From b4b74d72f03338d8a82c88dea551a12d84ea2f45 Mon Sep 17 00:00:00 2001 From: Yinan Xu Date: Wed, 30 Aug 2023 19:02:55 +0800 Subject: [PATCH] Add prepareClone and atClone APIs for Verilated models (#3503) (#4444) This API is used if the user copies the process using `fork` and similar OS-level mechanisms. The `at_clone` member function ensures that all model-allocated resources are re-allocated, such that the copied child process/model can simulate correctly. A typical allocated resource is the thread pool, which every model has its own pool. --- docs/guide/connecting.rst | 37 +++++++++++++ include/verilated.cpp | 8 +++ include/verilated.h | 2 + src/V3EmitCModel.cpp | 15 +++++ src/V3EmitCSyms.cpp | 2 +- test_regress/t/t_flag_csplit.pl | 2 + test_regress/t/t_wrapper_clone.cpp | 89 ++++++++++++++++++++++++++++++ test_regress/t/t_wrapper_clone.out | 15 +++++ test_regress/t/t_wrapper_clone.pl | 25 +++++++++ test_regress/t/t_wrapper_clone.v | 38 +++++++++++++ 10 files changed, 232 insertions(+), 1 deletion(-) create mode 100644 test_regress/t/t_wrapper_clone.cpp create mode 100644 test_regress/t/t_wrapper_clone.out create mode 100755 test_regress/t/t_wrapper_clone.pl create mode 100644 test_regress/t/t_wrapper_clone.v diff --git a/docs/guide/connecting.rst b/docs/guide/connecting.rst index 10b35eec90..2ef1b35bc9 100644 --- a/docs/guide/connecting.rst +++ b/docs/guide/connecting.rst @@ -128,6 +128,43 @@ in the distribution. These headers use Doxygen comments, `///` and `//<`, to indicate and document those functions that are part of the Verilated public API. +Process-Level Clone APIs +-------------------------- + +Modern operating systems support process-level clone (a.k.a copying, forking) +with system call interfaces in C/C++, e.g., :code:`fork()` in Linux. + +However, after cloning a parent process, some resources cannot be inherited +in the child process. For example, in POSIX systems, when you fork a process, +the child process inherits all the memory of the parent process. However, +only the thread that called fork is replicated in the child process. Other +threads are not. + +Therefore, to support the process-level clone mechanisms, Verilator supports +:code:`prepareClone()` and :code:`atClone()` APIs to allow the user to manually +re-construct the model in the child process. The two APIs handle all necessary +resources required for releasing and re-initializing before and after cloning. + +The two APIs are supported in the verilated models. Here is an example of usage +with Linux :code:`fork()` and :code:`pthread_atfork` APIs: + +.. code-block:: C++ + + // static function pointers to fit pthread_atfork + static auto prepareClone = [](){ topp->prepareClone(); }; + static auto atClone = [](){ topp->atClone(); }; + + // in main function, register the handlers: + pthread_atfork(prepareClone, atClone, atClone); + +For better flexibility, you can also manually call the handlers before and +after :code:`fork()`. + +With the process-level clone APIs, users can create process-level snapshots +for the verilated models. While the Verilator save/restore option provides +persistent and circuit-independent snapshots, the process-level clone APIs +enable in-memory, circuit-transparent, and highly efficient snapshots. + Direct Programming Interface (DPI) ================================== diff --git a/include/verilated.cpp b/include/verilated.cpp index a77094f651..6ffa7f3d86 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -2608,6 +2608,14 @@ VerilatedVirtualBase* VerilatedContext::threadPoolp() { return m_threadPool.get(); } +void VerilatedContext::prepareClone() { delete m_threadPool.release(); } + +VerilatedVirtualBase* VerilatedContext::threadPoolpOnClone() { + if (VL_UNLIKELY(m_threadPool)) m_threadPool.release(); + m_threadPool = std::make_unique(this, m_threads - 1); + return m_threadPool.get(); +} + VerilatedVirtualBase* VerilatedContext::enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&)) { if (!m_executionProfiler) m_executionProfiler.reset(construct(*this)); diff --git a/include/verilated.h b/include/verilated.h index 4ee19b681f..250f4a83de 100644 --- a/include/verilated.h +++ b/include/verilated.h @@ -568,6 +568,8 @@ class VerilatedContext VL_NOT_FINAL { void addModel(VerilatedModel*); VerilatedVirtualBase* threadPoolp(); + void prepareClone(); + VerilatedVirtualBase* threadPoolpOnClone(); VerilatedVirtualBase* enableExecutionProfiler(VerilatedVirtualBase* (*construct)(VerilatedContext&)); diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index d637088287..3f988ed324 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -234,6 +234,12 @@ class EmitCModel final : public EmitCFunc { puts("const char* hierName() const override final;\n"); puts("const char* modelName() const override final;\n"); puts("unsigned threads() const override final;\n"); + puts("/// Prepare for cloning the model at the process level (e.g. fork in Linux)\n"); + puts("/// Release necessary resources. Called before cloning.\n"); + puts("void prepareClone() const;\n"); + puts("/// Re-init after cloning the model at the process level (e.g. fork in Linux)\n"); + puts("/// Re-allocate necessary resources. Called after cloning.\n"); + puts("void atClone() const;\n"); if (v3Global.opt.trace()) { puts("std::unique_ptr traceConfig() const override final;\n"); } @@ -479,6 +485,15 @@ class EmitCModel final : public EmitCFunc { + "\"; }\n"); puts("unsigned " + topClassName() + "::threads() const { return " + cvtToStr(std::max(1, v3Global.opt.threads())) + "; }\n"); + puts("void " + topClassName() + + "::prepareClone() const { contextp()->prepareClone(); }\n"); + puts("void " + topClassName() + "::atClone() const {\n"); + if (v3Global.opt.threads() > 1) { + puts("vlSymsp->__Vm_threadPoolp = static_cast("); + } + puts("contextp()->threadPoolpOnClone()"); + if (v3Global.opt.threads() > 1) puts(")"); + puts(";\n}\n"); if (v3Global.opt.trace()) { puts("std::unique_ptr " + topClassName() diff --git a/src/V3EmitCSyms.cpp b/src/V3EmitCSyms.cpp index 04ea788146..0ae5d82fde 100644 --- a/src/V3EmitCSyms.cpp +++ b/src/V3EmitCSyms.cpp @@ -468,7 +468,7 @@ void EmitCSyms::emitSymHdr() { if (v3Global.opt.mtasks()) { puts("\n// MULTI-THREADING\n"); - puts("VlThreadPool* const __Vm_threadPoolp;\n"); + puts("VlThreadPool* __Vm_threadPoolp;\n"); puts("bool __Vm_even_cycle__ico = false;\n"); puts("bool __Vm_even_cycle__act = false;\n"); puts("bool __Vm_even_cycle__nba = false;\n"); diff --git a/test_regress/t/t_flag_csplit.pl b/test_regress/t/t_flag_csplit.pl index 1eb2c8dd4a..bcb3f9eef2 100755 --- a/test_regress/t/t_flag_csplit.pl +++ b/test_regress/t/t_flag_csplit.pl @@ -97,6 +97,8 @@ sub check_cpp { && $func !~ /::traceInit$/ && $func !~ /::traceFull$/ && $func !~ /::final$/ + && $func !~ /::prepareClone$/ + && $func !~ /::atClone$/ ) { push @funcs, $func; } diff --git a/test_regress/t/t_wrapper_clone.cpp b/test_regress/t/t_wrapper_clone.cpp new file mode 100644 index 0000000000..86fbfe1cc1 --- /dev/null +++ b/test_regress/t/t_wrapper_clone.cpp @@ -0,0 +1,89 @@ +// +// DESCRIPTION: Verilator: Verilog Test module for prepareClone/atClone APIs +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty, 2023 by Yinan Xu. +// SPDX-License-Identifier: CC0-1.0 + +#include + +#include + +#include + +// These require the above. Comment prevents clang-format moving them +#include "TestCheck.h" + +#include VM_PREFIX_INCLUDE + +double sc_time_stamp() { return 0; } + +// Note: Since the pthread_atfork API accepts only function pointers, +// we are using a static variable for the TOP just for a simple example. +// Without using the pthread_atfork API, the user can instead manually call +// prepareClone and atClone before and after calling fork, and topp can be +// allocated dynamically. +static VM_PREFIX* topp = nullptr; +static auto prepareClone = []() { topp->prepareClone(); }; +static auto atClone = []() { topp->atClone(); }; + +void single_cycle(VM_PREFIX* topp) { + topp->clock = 1; + topp->eval(); + + topp->clock = 0; + topp->eval(); +} + +int main(int argc, char** argv) { + // We disable the buffering for stdout in this test. + // Redirecting the stdout to files with buffering causes duplicated stdout + // outputs in both parent and child processes, even if they are actually + // called before the fork. + setvbuf(stdout, nullptr, _IONBF, 0); + + VerilatedContext* contextp = new VerilatedContext; + topp = new VM_PREFIX{contextp}; + + // To avoid resource leaks, prepareClone must be called before fork to + // free all the allocated resources. Though this would bring performance + // overhead to the parent process, we believe that fork should not be + // called frequently, and the overhead is minor compared to simulation. + pthread_atfork(prepareClone, atClone, atClone); + + // If you care about critical performance, prepareClone can be avoided, + // with atClone being called only at the child process, as follows. + // It has the same functionality as the previous one, but has memory leaks. + // According to the sanitizer, 288 bytes are leaked for one fork call. + // pthread_atfork(nullptr, nullptr, atClone); + + topp->reset = 1; + topp->is_parent = 0; + for (int i = 0; i < 5; i++) { single_cycle(topp); } + + topp->reset = 0; + while (!contextp->gotFinish()) { + single_cycle(topp); + + if (topp->do_clone) { + const int pid = fork(); + if (pid < 0) { + printf("fork failed\n"); + } else if (pid == 0) { + printf("child: here we go\n"); + } else { + while (wait(nullptr) > 0) + ; + printf("parent: here we go\n"); + topp->is_parent = 1; + } + } + } + + topp->final(); + + VL_DO_DANGLING(delete topp, topp); + VL_DO_DANGLING(delete contextp, contextp); + + return 0; +} diff --git a/test_regress/t/t_wrapper_clone.out b/test_regress/t/t_wrapper_clone.out new file mode 100644 index 0000000000..1a5b106d11 --- /dev/null +++ b/test_regress/t/t_wrapper_clone.out @@ -0,0 +1,15 @@ +counter = 0 +counter = 1 +counter = 2 +counter = 3 +counter = 4 +counter = 5 +child: here we go +counter = 6 +counter = 7 +counter = 8 +parent: here we go +counter = 6 +counter = 7 +counter = 8 +*-* All Finished *-* diff --git a/test_regress/t/t_wrapper_clone.pl b/test_regress/t/t_wrapper_clone.pl new file mode 100755 index 0000000000..54682086be --- /dev/null +++ b/test_regress/t/t_wrapper_clone.pl @@ -0,0 +1,25 @@ +#!/usr/bin/env perl +if (!$::Driver) { use strict; use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test module for prepareClone/atClone APIs +# +# This file ONLY is placed into the Public Domain, for any use, +# without warranty, 2023 by Yinan Xu. +# SPDX-License-Identifier: CC0-1.0 + +scenarios(vlt_all => 1); + +compile( + make_top_shell => 0, + make_main => 0, + verilator_flags2 => ["--exe $Self->{t_dir}/$Self->{name}.cpp", + "-cc"], + threads => $Self->{vltmt} ? 2 : 1, + ); + +execute( + check_finished => 1, + expect_filename => $Self->{golden_filename}, + ); + +ok(1); +1; diff --git a/test_regress/t/t_wrapper_clone.v b/test_regress/t/t_wrapper_clone.v new file mode 100644 index 0000000000..fa6546a8b8 --- /dev/null +++ b/test_regress/t/t_wrapper_clone.v @@ -0,0 +1,38 @@ +// DESCRIPTION: Verilator: Verilog Test module for prepareClone/atClone APIs +// +// This model counts from 0 to 8. It forks a child process (in C++) at 6 +// and waits for the child to simulate and exit for resumption (of the parent). +// +// This file ONLY is placed into the Public Domain, for any use, +// without warranty, 2023 by Yinan Xu. +// SPDX-License-Identifier: CC0-1.0 + +module top( + input clock, + input reset, + input is_parent, + output do_clone +); + +reg [3:0] counter; + +assign do_clone = counter == 4'h6; + +always @(posedge clock) begin + if (reset) begin + counter <= 4'h0; + end + else begin + counter <= counter + 4'h1; + $write("counter = %d\n", counter); + end + + if (counter[3]) begin + if (is_parent) begin + $write("*-* All Finished *-*\n"); + end + $finish(0); + end +end + +endmodule