Skip to content

Commit

Permalink
feat postgres: Add disabled_replicas to topology_settings
Browse files Browse the repository at this point in the history
Tests: testsuite
commit_hash:c9cb7f67d8fd15e5642dde89d8f53fe672e077e5
  • Loading branch information
antipovav committed Nov 26, 2024
1 parent 04e2150 commit cf6147f
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 6 deletions.
9 changes: 9 additions & 0 deletions postgresql/include/userver/storages/postgres/options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <userver/congestion_control/controllers/linear.hpp>
#include <userver/storages/postgres/postgres_fwd.hpp>
#include <userver/utils/impl/transparent_hash.hpp>
#include <userver/utils/str_icase.hpp>

USERVER_NAMESPACE_BEGIN

Expand Down Expand Up @@ -150,8 +151,16 @@ inline constexpr std::size_t kDefaultPoolMaxQueueSize = 200;
/// Default limit for concurrent establishing connections number
inline constexpr std::size_t kDefaultConnectingLimit = 0;

/// @brief PostgreSQL topology options
///
/// Dynamic option @ref POSTGRES_TOPOLOGY_SETTINGS
struct TopologySettings {
/// Maximum replication lag. Once the replica lag exceeds this value it will be automatically disabled.
std::chrono::milliseconds max_replication_lag{kDefaultMaxReplicationLag};

/// List of manually disabled replicas (FQDNs).
std::unordered_set<std::string, USERVER_NAMESPACE::utils::StrIcaseHash, USERVER_NAMESPACE::utils::StrIcaseEqual>
disabled_replicas{};
};

/// @brief PostgreSQL connection pool options
Expand Down
17 changes: 12 additions & 5 deletions postgresql/src/storages/postgres/detail/topology/hot_standby.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ size_t ParseSize(std::string_view token) {
} // namespace

struct HotStandby::HostState {
explicit HostState(const Dsn& dsn) : app_name{EscapeHostName(OptionsFromDsn(dsn).host)} {}
explicit HostState(const Dsn& dsn) : host_name{OptionsFromDsn(dsn).host}, app_name(EscapeHostName(host_name)) {}

~HostState() {
// close connections synchronously
Expand All @@ -117,7 +117,9 @@ struct HotStandby::HostState {

std::unique_ptr<Connection> connection;

const std::string host_port;
// Used to check against disabled_replicas
std::string host_name;

// In pg_stat_replication slaves' host names are escaped and the column is
// called `application_name`
std::string app_name;
Expand Down Expand Up @@ -225,11 +227,16 @@ void HotStandby::RunDiscovery() {
std::chrono::duration_cast<std::chrono::milliseconds>(slave_lag).count()
);

auto& max_replication_lag = GetTopologySettings().max_replication_lag;
if (max_replication_lag > std::chrono::milliseconds{0} && slave_lag > max_replication_lag) {
const auto& topology_settings = GetTopologySettings();
if (topology_settings.max_replication_lag > std::chrono::milliseconds{0} &&
slave_lag > topology_settings.max_replication_lag) {
// Demote lagged slave
LOG_INFO() << "Disabling slave " << slave.app_name << " due to replication lag of " << slave_lag.count()
<< " ms (max " << GetTopologySettings().max_replication_lag.count() << " ms)";
<< " ms (max " << topology_settings.max_replication_lag.count() << " ms)";
slave.role = ClusterHostType::kNone;
} else if (topology_settings.disabled_replicas.count(slave.host_name)) {
// Manually disable slave
LOG_INFO() << "Disabling slave " << slave.app_name << " due to manual setting in config";
slave.role = ClusterHostType::kNone;
} else if (master) {
// Check for sync slave
Expand Down
1 change: 1 addition & 0 deletions postgresql/src/storages/postgres/postgres_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ TopologySettings Parse(const formats::json::Value& config, formats::parse::To<To

result.max_replication_lag =
config["max_replication_lag_ms"].template As<std::chrono::milliseconds>(result.max_replication_lag);
result.disabled_replicas = config["disabled_replicas"].template As<decltype(result.disabled_replicas)>({});

if (result.max_replication_lag < std::chrono::milliseconds{0})
throw InvalidConfig{"max_replication_lag cannot be less than 0"};
Expand Down
8 changes: 7 additions & 1 deletion scripts/docs/en/schemas/dynamic_configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,11 @@ properties:
minimum: 0
description: maximum allowed replication lag. If equals 0 no replication
lag checks are performed
disabled_replicas:
type: array
description: List of manually disabled replicas (FQDNs).
items:
type: string
required:
- max_replication_lag_ms
```
Expand All @@ -475,7 +480,8 @@ required:
```json
{
"__default__": {
"max_replication_lag_ms": 60000
"max_replication_lag_ms": 60000,
"disabled_replicas": ["replica-01.example.com", "replica-02.example.com"]
}
}
```
Expand Down

0 comments on commit cf6147f

Please sign in to comment.