Skip to content

Commit 3cffac3

Browse files
SergiySWrkeene
authored andcommitted
Lazy bootstrap improvements (#1427)
1 parent 1ac017d commit 3cffac3

File tree

11 files changed

+200
-48
lines changed

11 files changed

+200
-48
lines changed

rai/core_test/network.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,43 @@ TEST (bootstrap_processor, lazy_hash)
768768
node1->stop ();
769769
}
770770

771+
TEST (bootstrap_processor, lazy_max_pull_count)
772+
{
773+
rai::system system (24000, 1);
774+
rai::node_init init1;
775+
rai::genesis genesis;
776+
rai::keypair key1;
777+
rai::keypair key2;
778+
// Generating test chain
779+
auto send1 (std::make_shared<rai::state_block> (rai::test_genesis_key.pub, genesis.hash (), rai::test_genesis_key.pub, rai::genesis_amount - rai::Gxrb_ratio, key1.pub, rai::test_genesis_key.prv, rai::test_genesis_key.pub, system.nodes[0]->work_generate_blocking (genesis.hash ())));
780+
auto receive1 (std::make_shared<rai::state_block> (key1.pub, 0, key1.pub, rai::Gxrb_ratio, send1->hash (), key1.prv, key1.pub, system.nodes[0]->work_generate_blocking (key1.pub)));
781+
auto send2 (std::make_shared<rai::state_block> (key1.pub, receive1->hash (), key1.pub, 0, key2.pub, key1.prv, key1.pub, system.nodes[0]->work_generate_blocking (receive1->hash ())));
782+
auto receive2 (std::make_shared<rai::state_block> (key2.pub, 0, key2.pub, rai::Gxrb_ratio, send2->hash (), key2.prv, key2.pub, system.nodes[0]->work_generate_blocking (key2.pub)));
783+
auto change1 (std::make_shared<rai::state_block> (key2.pub, receive2->hash (), key1.pub, rai::Gxrb_ratio, 0, key2.prv, key2.pub, system.nodes[0]->work_generate_blocking (receive2->hash ())));
784+
auto change2 (std::make_shared<rai::state_block> (key2.pub, change1->hash (), rai::test_genesis_key.pub, rai::Gxrb_ratio, 0, key2.prv, key2.pub, system.nodes[0]->work_generate_blocking (change1->hash ())));
785+
auto change3 (std::make_shared<rai::state_block> (key2.pub, change2->hash (), key2.pub, rai::Gxrb_ratio, 0, key2.prv, key2.pub, system.nodes[0]->work_generate_blocking (change2->hash ())));
786+
// Processing test chain
787+
system.nodes[0]->block_processor.add (send1, std::chrono::steady_clock::time_point ());
788+
system.nodes[0]->block_processor.add (receive1, std::chrono::steady_clock::time_point ());
789+
system.nodes[0]->block_processor.add (send2, std::chrono::steady_clock::time_point ());
790+
system.nodes[0]->block_processor.add (receive2, std::chrono::steady_clock::time_point ());
791+
system.nodes[0]->block_processor.add (change1, std::chrono::steady_clock::time_point ());
792+
system.nodes[0]->block_processor.add (change2, std::chrono::steady_clock::time_point ());
793+
system.nodes[0]->block_processor.add (change3, std::chrono::steady_clock::time_point ());
794+
system.nodes[0]->block_processor.flush ();
795+
// Start lazy bootstrap with last block in chain known
796+
auto node1 (std::make_shared<rai::node> (init1, system.service, 24001, rai::unique_path (), system.alarm, system.logging, system.work));
797+
node1->peers.insert (system.nodes[0]->network.endpoint (), rai::protocol_version);
798+
node1->bootstrap_initiator.bootstrap_lazy (change3->hash ());
799+
// Check processed blocks
800+
system.deadline_set (10s);
801+
while (node1->block (change3->hash ()) == nullptr)
802+
{
803+
ASSERT_NO_ERROR (system.poll ());
804+
}
805+
node1->stop ();
806+
}
807+
771808
TEST (frontier_req_response, DISABLED_destruction)
772809
{
773810
{

rai/node/bootstrap.cpp

Lines changed: 114 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -394,10 +394,10 @@ void rai::frontier_req_client::next (rai::transaction const & transaction_a)
394394

395395
rai::bulk_pull_client::bulk_pull_client (std::shared_ptr<rai::bootstrap_client> connection_a, rai::pull_info const & pull_a) :
396396
connection (connection_a),
397+
total_blocks (0),
397398
pull (pull_a)
398399
{
399400
std::lock_guard<std::mutex> mutex (connection->attempt->mutex);
400-
++connection->attempt->pulling;
401401
connection->attempt->condition.notify_all ();
402402
}
403403

@@ -407,6 +407,10 @@ rai::bulk_pull_client::~bulk_pull_client ()
407407
if (expected != pull.end)
408408
{
409409
pull.head = expected;
410+
if (connection->attempt->lazy_mode)
411+
{
412+
pull.account = expected;
413+
}
410414
connection->attempt->requeue_pull (pull);
411415
if (connection->node->config.logging.bulk_pull_logging ())
412416
{
@@ -552,25 +556,32 @@ void rai::bulk_pull_client::received_block (boost::system::error_code const & ec
552556
block->serialize_json (block_l);
553557
BOOST_LOG (connection->node->log) << boost::str (boost::format ("Pulled block %1% %2%") % hash.to_string () % block_l);
554558
}
559+
bool block_expected (false);
555560
if (hash == expected)
556561
{
557562
expected = block->previous ();
563+
block_expected = true;
558564
}
559565
if (connection->block_count++ == 0)
560566
{
561567
connection->start_time = std::chrono::steady_clock::now ();
562568
}
563569
connection->attempt->total_blocks++;
564-
bool stop_pull (connection->attempt->process_block (block));
570+
total_blocks++;
571+
bool stop_pull (connection->attempt->process_block (block, total_blocks, block_expected));
565572
if (!stop_pull && !connection->hard_stop.load ())
566573
{
567574
receive_block ();
568575
}
569-
else if (stop_pull && expected == block->previous ())
576+
else if (stop_pull && block_expected)
570577
{
571578
expected = pull.end;
572579
connection->attempt->pool_connection (connection);
573580
}
581+
if (stop_pull)
582+
{
583+
connection->attempt->lazy_stopped++;
584+
}
574585
}
575586
else
576587
{
@@ -738,17 +749,11 @@ pulling (0),
738749
node (node_a),
739750
account_count (0),
740751
total_blocks (0),
752+
lazy_stopped (0),
741753
stopped (false),
742754
lazy_mode (false)
743755
{
744-
if (lazy_mode)
745-
{
746-
BOOST_LOG (node->log) << "Starting lazy-bootstrap attempt";
747-
}
748-
else
749-
{
750-
BOOST_LOG (node->log) << "Starting bootstrap attempt";
751-
}
756+
BOOST_LOG (node->log) << "Starting bootstrap attempt";
752757
node->bootstrap_initiator.notify_listeners (true);
753758
}
754759

@@ -814,6 +819,17 @@ void rai::bootstrap_attempt::request_pull (std::unique_lock<std::mutex> & lock_a
814819
{
815820
auto pull (pulls.front ());
816821
pulls.pop_front ();
822+
if (lazy_mode)
823+
{
824+
// Check if pull is obsolete (head was processed)
825+
std::unique_lock<std::mutex> lock (lazy_mutex);
826+
while (!pulls.empty () && !pull.head.is_zero () && lazy_blocks.find (pull.head) != lazy_blocks.end ())
827+
{
828+
pull = pulls.front ();
829+
pulls.pop_front ();
830+
}
831+
}
832+
++pulling;
817833
// The bulk_pull_client destructor attempt to requeue_pull which can cause a deadlock if this is the last reference
818834
// Dispatch request in an external thread in case it needs to be destroyed
819835
node->background ([connection_l, pull]() {
@@ -900,15 +916,16 @@ void rai::bootstrap_attempt::run ()
900916
if (!stopped)
901917
{
902918
BOOST_LOG (node->log) << "Completed pulls";
919+
request_push (lock);
903920
// Start lazy bootstrap if some lazy keys were inserted
904921
if (!lazy_keys.empty ())
905922
{
906923
lock.unlock ();
924+
lazy_mode = true;
907925
lazy_run ();
908926
lock.lock ();
909927
}
910928
}
911-
request_push (lock);
912929
stopped = true;
913930
condition.notify_all ();
914931
idle.clear ();
@@ -1156,19 +1173,19 @@ void rai::bootstrap_attempt::add_bulk_push_target (rai::block_hash const & head,
11561173
void rai::bootstrap_attempt::lazy_start (rai::block_hash const & hash_a)
11571174
{
11581175
std::unique_lock<std::mutex> lock (lazy_mutex);
1159-
// Add start blocks
1160-
if (lazy_keys.find (hash_a) == lazy_keys.end ())
1176+
// Add start blocks, limit 1024 (32k with disabled legacy bootstrap)
1177+
size_t max_keys (node->flags.disable_legacy_bootstrap ? 32 * 1024 : 1024);
1178+
if (lazy_keys.size () < max_keys && lazy_keys.find (hash_a) == lazy_keys.end () && lazy_blocks.find (hash_a) == lazy_blocks.end ())
11611179
{
11621180
lazy_keys.insert (hash_a);
1181+
lazy_pulls.push_back (hash_a);
11631182
}
1164-
lazy_add (hash_a);
11651183
}
11661184

11671185
void rai::bootstrap_attempt::lazy_add (rai::block_hash const & hash_a)
11681186
{
11691187
// Add only unknown blocks
11701188
assert (!lazy_mutex.try_lock ());
1171-
11721189
if (lazy_blocks.find (hash_a) == lazy_blocks.end ())
11731190
{
11741191
lazy_pulls.push_back (hash_a);
@@ -1183,7 +1200,7 @@ void rai::bootstrap_attempt::lazy_pull_flush ()
11831200
// Recheck if block was already processed
11841201
if (lazy_blocks.find (pull_start) == lazy_blocks.end ())
11851202
{
1186-
add_pull (rai::pull_info (pull_start, pull_start, rai::block_hash (0)));
1203+
add_pull (rai::pull_info (pull_start, pull_start, rai::block_hash (0), lazy_max_pull_blocks));
11871204
}
11881205
}
11891206
lazy_pulls.clear ();
@@ -1194,7 +1211,7 @@ bool rai::bootstrap_attempt::lazy_finished ()
11941211
bool result (true);
11951212
auto transaction (node->store.tx_begin_read ());
11961213
std::unique_lock<std::mutex> lock (lazy_mutex);
1197-
for (auto it (lazy_keys.begin ()), end (lazy_keys.end ()); it != end;)
1214+
for (auto it (lazy_keys.begin ()), end (lazy_keys.end ()); it != end && !stopped;)
11981215
{
11991216
if (node->store.block_exists (transaction, *it))
12001217
{
@@ -1208,18 +1225,24 @@ bool rai::bootstrap_attempt::lazy_finished ()
12081225
// No need to increment `it` as we break above.
12091226
}
12101227
}
1228+
// Finish lazy bootstrap without lazy pulls (in combination with still_pulling ())
1229+
if (!result && lazy_pulls.empty ())
1230+
{
1231+
result = true;
1232+
}
12111233
return result;
12121234
}
12131235

12141236
void rai::bootstrap_attempt::lazy_run ()
12151237
{
12161238
populate_connections ();
12171239
auto start_time (std::chrono::steady_clock::now ());
1218-
auto max_time (std::chrono::milliseconds (30 * 60 * 1000));
1240+
auto max_time (std::chrono::minutes (node->flags.disable_legacy_bootstrap ? 48 * 60 : 30));
12191241
std::unique_lock<std::mutex> lock (mutex);
12201242
while ((still_pulling () || !lazy_finished ()) && std::chrono::steady_clock::now () - start_time < max_time)
12211243
{
1222-
while (still_pulling ())
1244+
unsigned iterations (0);
1245+
while (still_pulling () && std::chrono::steady_clock::now () - start_time < max_time)
12231246
{
12241247
if (!pulls.empty ())
12251248
{
@@ -1236,6 +1259,14 @@ void rai::bootstrap_attempt::lazy_run ()
12361259
{
12371260
condition.wait (lock);
12381261
}
1262+
++iterations;
1263+
// Flushing lazy pulls
1264+
if (iterations % 100 == 0)
1265+
{
1266+
lock.unlock ();
1267+
lazy_pull_flush ();
1268+
lock.lock ();
1269+
}
12391270
}
12401271
// Flushing may resolve forks which can add more pulls
12411272
// Flushing lazy pulls
@@ -1253,10 +1284,10 @@ void rai::bootstrap_attempt::lazy_run ()
12531284
idle.clear ();
12541285
}
12551286

1256-
bool rai::bootstrap_attempt::process_block (std::shared_ptr<rai::block> block_a)
1287+
bool rai::bootstrap_attempt::process_block (std::shared_ptr<rai::block> block_a, uint64_t total_blocks, bool block_expected)
12571288
{
12581289
bool stop_pull (false);
1259-
if (lazy_mode)
1290+
if (lazy_mode && block_expected)
12601291
{
12611292
auto hash (block_a->hash ());
12621293
std::unique_lock<std::mutex> lock (lazy_mutex);
@@ -1267,49 +1298,90 @@ bool rai::bootstrap_attempt::process_block (std::shared_ptr<rai::block> block_a)
12671298
auto transaction (node->store.tx_begin_read ());
12681299
if (!node->store.block_exists (transaction, hash))
12691300
{
1270-
lazy_blocks.insert (hash);
1301+
rai::uint128_t balance (std::numeric_limits<rai::uint128_t>::max ());
12711302
node->block_processor.add (block_a, std::chrono::steady_clock::time_point ());
12721303
// Search for new dependencies
12731304
if (!block_a->source ().is_zero () && !node->store.block_exists (transaction, block_a->source ()))
12741305
{
12751306
lazy_add (block_a->source ());
12761307
}
1308+
else if (block_a->type () == rai::block_type::send)
1309+
{
1310+
// Calculate balance for legacy send blocks
1311+
std::shared_ptr<rai::send_block> block_l (std::static_pointer_cast<rai::send_block> (block_a));
1312+
if (block_l != nullptr)
1313+
{
1314+
balance = block_l->hashables.balance.number ();
1315+
}
1316+
}
12771317
else if (block_a->type () == rai::block_type::state)
12781318
{
12791319
std::shared_ptr<rai::state_block> block_l (std::static_pointer_cast<rai::state_block> (block_a));
12801320
if (block_l != nullptr)
12811321
{
1322+
balance = block_l->hashables.balance.number ();
12821323
rai::block_hash link (block_l->hashables.link);
12831324
// If link is not epoch link or 0. And if block from link unknown
12841325
if (!link.is_zero () && link != node->ledger.epoch_link && lazy_blocks.find (link) == lazy_blocks.end () && !node->store.block_exists (transaction, link))
12851326
{
1327+
rai::block_hash previous (block_l->hashables.previous);
12861328
// If state block previous is 0 then source block required
1287-
if (block_l->hashables.previous.is_zero ())
1329+
if (previous.is_zero ())
12881330
{
12891331
lazy_add (link);
12901332
}
12911333
// In other cases previous block balance required to find out subtype of state block
1292-
else if (node->store.block_exists (transaction, block_l->hashables.previous))
1334+
else if (node->store.block_exists (transaction, previous))
12931335
{
1294-
rai::amount prev_balance (node->ledger.balance (transaction, block_l->hashables.previous));
1295-
if (prev_balance.number () <= block_l->hashables.balance.number ())
1336+
rai::amount prev_balance (node->ledger.balance (transaction, previous));
1337+
if (prev_balance.number () <= balance)
12961338
{
12971339
lazy_add (link);
12981340
}
12991341
}
1342+
// Search balance of already processed previous blocks
1343+
else if (lazy_blocks.find (previous) != lazy_blocks.end ())
1344+
{
1345+
auto previous_balance (lazy_balances.find (previous));
1346+
if (previous_balance != lazy_balances.end ())
1347+
{
1348+
if (previous_balance->second <= balance)
1349+
{
1350+
lazy_add (link);
1351+
}
1352+
lazy_balances.erase (previous_balance);
1353+
}
1354+
}
1355+
// Insert in unknown state blocks if previous wasn't already processed
13001356
else
13011357
{
1302-
lazy_state_unknown.insert (std::make_pair (block_l->hashables.previous, block_l));
1358+
lazy_state_unknown.insert (std::make_pair (previous, std::make_pair (link, balance)));
13031359
}
13041360
}
13051361
}
13061362
}
1363+
lazy_blocks.insert (hash);
1364+
// Adding lazy balances
1365+
if (total_blocks == 0)
1366+
{
1367+
lazy_balances.insert (std::make_pair (hash, balance));
1368+
}
1369+
// Removing lazy balances
1370+
if (!block_a->previous ().is_zero () && lazy_balances.find (block_a->previous ()) != lazy_balances.end ())
1371+
{
1372+
lazy_balances.erase (block_a->previous ());
1373+
}
13071374
}
13081375
// Drop bulk_pull if block is already known (ledger)
13091376
else
13101377
{
13111378
// Disabled until server rewrite
13121379
// stop_pull = true;
1380+
// Force drop lazy bootstrap connection for long bulk_pull
1381+
if (total_blocks > lazy_max_pull_blocks)
1382+
{
1383+
stop_pull = true;
1384+
}
13131385
}
13141386
//Search unknown state blocks balances
13151387
auto find_state (lazy_state_unknown.find (hash));
@@ -1321,18 +1393,18 @@ bool rai::bootstrap_attempt::process_block (std::shared_ptr<rai::block> block_a)
13211393
if (block_a->type () == rai::block_type::state)
13221394
{
13231395
std::shared_ptr<rai::state_block> block_l (std::static_pointer_cast<rai::state_block> (block_a));
1324-
if (block_l->hashables.balance.number () <= next_block->hashables.balance.number ())
1396+
if (block_l->hashables.balance.number () <= next_block.second)
13251397
{
1326-
lazy_add (next_block->hashables.link);
1398+
lazy_add (next_block.first);
13271399
}
13281400
}
13291401
// Retrieve balance for previous legacy send blocks
13301402
else if (block_a->type () == rai::block_type::send)
13311403
{
13321404
std::shared_ptr<rai::send_block> block_l (std::static_pointer_cast<rai::send_block> (block_a));
1333-
if (block_l->hashables.balance.number () <= next_block->hashables.balance.number ())
1405+
if (block_l->hashables.balance.number () <= next_block.second)
13341406
{
1335-
lazy_add (next_block->hashables.link);
1407+
lazy_add (next_block.first);
13361408
}
13371409
}
13381410
// Weak assumption for other legacy block types
@@ -1347,8 +1419,18 @@ bool rai::bootstrap_attempt::process_block (std::shared_ptr<rai::block> block_a)
13471419
{
13481420
// Disabled until server rewrite
13491421
// stop_pull = true;
1422+
// Force drop lazy bootstrap connection for long bulk_pull
1423+
if (total_blocks > lazy_max_pull_blocks)
1424+
{
1425+
stop_pull = true;
1426+
}
13501427
}
13511428
}
1429+
else if (lazy_mode)
1430+
{
1431+
// Drop connection with unexpected block for lazy bootstrap
1432+
stop_pull = true;
1433+
}
13521434
else
13531435
{
13541436
node->block_processor.add (block_a, std::chrono::steady_clock::time_point ());

0 commit comments

Comments
 (0)