Skip to content

Commit 85f972c

Browse files
authored
Merge pull request #84 from tgockel/issue/82/server-spin
server: Do not spin in `server::run_process`.
2 parents 92470a0 + 1ad97e6 commit 85f972c

File tree

6 files changed

+201
-25
lines changed

6 files changed

+201
-25
lines changed

src/zk/server/detail/event_handle.cpp

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include "event_handle.hpp"
2+
#include "close.hpp"
3+
4+
#include <cerrno>
5+
#include <cstdint>
6+
#include <system_error>
7+
8+
#include <sys/eventfd.h>
9+
#include <unistd.h>
10+
11+
namespace zk::server::detail
12+
{
13+
14+
event_handle::event_handle() :
15+
_fd(::eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK))
16+
{ }
17+
18+
event_handle::~event_handle() noexcept
19+
{
20+
close();
21+
}
22+
23+
void event_handle::close() noexcept
24+
{
25+
if (_fd != -1)
26+
{
27+
detail::close(_fd);
28+
_fd = -1;
29+
}
30+
}
31+
32+
void event_handle::notify_one()
33+
{
34+
std::uint64_t x = 1;
35+
if (::write(_fd, &x, sizeof x) == -1 && errno != EAGAIN)
36+
throw std::system_error(errno, std::system_category(), "event_handle::notify_one()");
37+
}
38+
39+
bool event_handle::try_wait()
40+
{
41+
std::uint64_t burn;
42+
if (::read(_fd, &burn, sizeof burn) == -1)
43+
return errno == EAGAIN ? false
44+
: throw std::system_error(errno, std::system_category(), "event_handle::try_wait()");
45+
else
46+
return true;
47+
}
48+
49+
}

src/zk/server/detail/event_handle.hpp

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#pragma once
2+
3+
#include <zk/config.hpp>
4+
5+
namespace zk::server::detail
6+
{
7+
8+
class event_handle final
9+
{
10+
public:
11+
using native_handle_type = int;
12+
13+
public:
14+
explicit event_handle();
15+
16+
event_handle(const event_handle&) = delete;
17+
18+
event_handle& operator=(const event_handle&) = delete;
19+
20+
~event_handle() noexcept;
21+
22+
/// Close this event for future signalling. This is automatically called from the destructor.
23+
void close() noexcept;
24+
25+
/// Signal this handle that something has happened.
26+
void notify_one();
27+
28+
/// Attempt to wait for this handle to be signalled, but do not block.
29+
///
30+
/// \returns \c true if we successfully waited for a signal (and consumed it); \c false if this handle was not
31+
/// signalled.
32+
bool try_wait();
33+
34+
/// Get the file descriptor backing this handle. This is generally only used when interacting with the kernel and
35+
/// should be avoided in regular use.
36+
native_handle_type native_handle() { return _fd; }
37+
38+
private:
39+
native_handle_type _fd;
40+
};
41+
42+
}

src/zk/server/detail/subprocess.cpp

+26-8
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <algorithm>
44
#include <iterator>
5+
#include <numeric>
56
#include <ostream>
67
#include <system_error>
78

@@ -90,27 +91,44 @@ subprocess::subprocess(std::string program_name, argument_list args) :
9091

9192
subprocess::~subprocess() noexcept
9293
{
93-
auto old_sig_handler = ::signal(SIGALRM, [](int) {});
94+
terminate();
95+
}
96+
97+
void subprocess::terminate(duration_type time_to_abort) noexcept
98+
{
99+
auto alarm_time = [&] () -> unsigned int
100+
{
101+
if (time_to_abort.count() <= 0)
102+
return 1U;
103+
else if (time_to_abort.count() > 300)
104+
return 300U;
105+
else
106+
return static_cast<unsigned int>(time_to_abort.count());
107+
}();
108+
94109
for (unsigned attempt = 1U; _proc_id != -1; ++attempt)
95110
{
96-
signal(attempt == 1U ? SIGTERM : SIGABRT, true /* terminate the whole process group */);
111+
auto old_sig_handler = ::signal(SIGALRM, [](int) { });
112+
signal(attempt == 1U ? SIGTERM : SIGABRT);
97113

98114
int rc;
99-
::alarm(1);
115+
::alarm(alarm_time);
100116
if (::waitpid(_proc_id, &rc, 0) > 0)
117+
{
101118
_proc_id = -1;
102-
}
119+
}
103120

104-
::alarm(0);
105-
::signal(SIGALRM, old_sig_handler);
121+
::alarm(0);
122+
::signal(SIGALRM, old_sig_handler);
123+
}
106124
}
107125

108-
bool subprocess::signal(int sig_val, bool whole_group)
126+
bool subprocess::signal(int sig_val)
109127
{
110128
if (_proc_id == -1)
111129
return false;
112130

113-
pid_t pid = whole_group ? -_proc_id : _proc_id;
131+
pid_t pid = _proc_id;
114132

115133
int rc = ::kill(pid, sig_val);
116134
if (rc == -1 && errno == ESRCH)

src/zk/server/detail/subprocess.hpp

+13-9
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <zk/config.hpp>
44

5+
#include <chrono>
56
#include <cstddef>
67
#include <iosfwd>
78
#include <string>
@@ -12,12 +13,13 @@
1213
namespace zk::server::detail
1314
{
1415

15-
/** Represents an owned subprocess. **/
16+
/// Represents an owned subprocess.
1617
class subprocess
1718
{
1819
public:
19-
using handle = int;
20+
using handle = int;
2021
using argument_list = std::vector<std::string>;
22+
using duration_type = std::chrono::seconds;
2123

2224
public:
2325
explicit subprocess(std::string program_name, argument_list args = argument_list());
@@ -27,18 +29,20 @@ class subprocess
2729

2830
~subprocess() noexcept;
2931

30-
/** Send a signal to this subprocess.
31-
*
32-
* \param whole_group Should the entire process group be signalled or just the root process?
33-
* \returns \c true if the signal likely reached the subprocess; \c false if it might not have (this can happen if
34-
* the subprocess has already terminated).
35-
**/
36-
bool signal(int sig_val, bool whole_group = false);
32+
/// Send a signal to this subprocess.
33+
///
34+
/// \returns \c true if the signal likely reached the subprocess; \c false if it might not have (this can happen if
35+
/// the subprocess has already terminated).
36+
bool signal(int sig_val);
3737

3838
pipe& stdin() noexcept { return _stdin; }
3939
pipe& stdout() noexcept { return _stdout; }
4040
pipe& stderr() noexcept { return _stderr; }
4141

42+
/// Terminate the process if it is still running. In the first attempt to terminate, \c SIGTERM is used. If the
43+
/// process has not terminated before \a time_to_abort has passed, the process is signalled again with \c SIGABRT.
44+
void terminate(duration_type time_to_abort = std::chrono::seconds(1U)) noexcept;
45+
4246
private:
4347
std::string _program_name;
4448
handle _proc_id;

src/zk/server/server.cpp

+61-6
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@
22

33
#include <zk/future.hpp>
44

5+
#include <cerrno>
56
#include <exception>
67
#include <iostream>
8+
#include <system_error>
79

810
#include <signal.h>
11+
#include <sys/select.h>
912

1013
#include "classpath.hpp"
1114
#include "configuration.hpp"
15+
#include "detail/event_handle.hpp"
1216
#include "detail/subprocess.hpp"
1317

1418
namespace zk::server
@@ -27,7 +31,8 @@ static void validate_settings(const configuration& settings)
2731
}
2832

2933
server::server(classpath packages, configuration settings) :
30-
_running(true)
34+
_running(true),
35+
_shutdown_event(std::make_unique<detail::event_handle>())
3136
{
3237
validate_settings(settings);
3338
_worker = std::thread([this, packages = std::move(packages), settings = std::move(settings)] ()
@@ -48,11 +53,33 @@ server::~server() noexcept
4853

4954
void server::shutdown(bool wait_for_stop)
5055
{
51-
_running = false;
56+
_running.store(false, std::memory_order_release);
57+
_shutdown_event->notify_one();
58+
5259
if (wait_for_stop && _worker.joinable())
5360
_worker.join();
5461
}
5562

63+
static void wait_for_event(int fd1, int fd2, int fd3)
64+
{
65+
// This could be implemented with epoll instead of select, but since N=3, it doesn't really matter
66+
::fd_set read_fds;
67+
FD_ZERO(&read_fds);
68+
FD_SET(fd1, &read_fds);
69+
FD_SET(fd2, &read_fds);
70+
FD_SET(fd3, &read_fds);
71+
72+
int nfds = std::max(std::max(fd1, fd2), fd3) + 1;
73+
int rc = ::select(nfds, &read_fds, nullptr, nullptr, nullptr);
74+
if (rc < 0)
75+
{
76+
if (errno == EINTR)
77+
return;
78+
else
79+
throw std::system_error(errno, std::system_category(), "select");
80+
}
81+
}
82+
5683
void server::run_process(const classpath& packages, const configuration& settings)
5784
{
5885
detail::subprocess::argument_list args = { "-cp", packages.command_line(),
@@ -70,12 +97,40 @@ void server::run_process(const classpath& packages, const configuration& setting
7097

7198
detail::subprocess proc("java", std::move(args));
7299

73-
while (_running.load(std::memory_order_relaxed))
100+
auto drain_pipes = [&] ()
101+
{
102+
bool read_anything = true;
103+
while (read_anything)
104+
{
105+
read_anything = false;
106+
107+
auto out = proc.stdout().read();
108+
if (!out.empty())
109+
{
110+
read_anything = true;
111+
std::cout << out;
112+
}
113+
114+
auto err = proc.stderr().read();
115+
if (!err.empty())
116+
{
117+
read_anything = true;
118+
std::cerr << out;
119+
}
120+
}
121+
};
122+
123+
while (_running.load(std::memory_order_acquire))
74124
{
75-
std::cout << proc.stdout().read();
76-
std::cerr << proc.stderr().read();
125+
wait_for_event(proc.stdout().native_read_handle(),
126+
proc.stderr().native_read_handle(),
127+
_shutdown_event->native_handle()
128+
);
129+
130+
drain_pipes();
77131
}
78-
proc.signal(SIGTERM);
132+
proc.terminate();
133+
drain_pipes();
79134
}
80135

81136
}

src/zk/server/server.hpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
namespace zk::server
1212
{
1313

14+
namespace detail
15+
{
16+
17+
class event_handle;
18+
19+
}
20+
1421
/// \defgroup Server
1522
/// Control a ZooKeeper \ref server process.
1623
/// \{
@@ -53,8 +60,9 @@ class server final
5360
void run_process(const classpath&, const configuration&);
5461

5562
private:
56-
std::atomic<bool> _running;
57-
std::thread _worker;
63+
std::atomic<bool> _running;
64+
std::unique_ptr<detail::event_handle> _shutdown_event;
65+
std::thread _worker;
5866

5967
// NOTE: The configuration is NOT stored in the server object. This is because configuration can be changed by the
6068
// ZK process in cases like ensemble reconfiguration. It is the job of run_process to deal with this.

0 commit comments

Comments
 (0)