From 9cdb1cf8fb5a16f4eea5d60479e23068a27283a5 Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Fri, 19 Dec 2025 01:54:57 +0000 Subject: [PATCH] Add a new metrics etcd_server_request_duration_seconds to track e2e duration for each request Signed-off-by: Benjamin Wang --- server/etcdserver/metrics.go | 14 ++++++ server/etcdserver/v3_server.go | 85 +++++++++++++++++++++++++++++++++- tests/e2e/metrics_test.go | 1 + 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/server/etcdserver/metrics.go b/server/etcdserver/metrics.go index 7176d30adbc0..4d39afcf6fa8 100644 --- a/server/etcdserver/metrics.go +++ b/server/etcdserver/metrics.go @@ -107,6 +107,19 @@ var ( Name: "read_indexes_failed_total", Help: "The total number of failed read indexes seen.", }) + requestDurationSec = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "server", + Name: "request_duration_seconds", + Help: "Response latency distribution in seconds for each type.", + + // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 + // highest bucket start of 0.001 sec * 2^13 == 8.192 sec + Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), + }, + []string{"type", "success"}, + ) leaseExpired = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "etcd_debugging", Subsystem: "server", @@ -173,6 +186,7 @@ func init() { prometheus.MustRegister(proposalsFailed) prometheus.MustRegister(slowReadIndex) prometheus.MustRegister(readIndexFailed) + prometheus.MustRegister(requestDurationSec) prometheus.MustRegister(leaseExpired) prometheus.MustRegister(currentVersion) prometheus.MustRegister(currentGoVersion) diff --git a/server/etcdserver/v3_server.go b/server/etcdserver/v3_server.go index 83366a9e8c8c..4c65f91d0008 100644 --- a/server/etcdserver/v3_server.go +++ b/server/etcdserver/v3_server.go @@ -131,6 +131,8 @@ func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeRe ) } trace.LogIfLong(traceThreshold) + success := err == nil + requestDurationSec.WithLabelValues("Range", strconv.FormatBool(success)).Observe(time.Since(start).Seconds()) }(time.Now()) if !r.Serializable { @@ -218,6 +220,8 @@ func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse defer func(start time.Time) { txn.WarnOfExpensiveReadOnlyTxnRequest(s.Logger(), s.Cfg.WarningApplyDuration, start, r, resp, err) trace.LogIfLong(traceThreshold) + success := err == nil + requestDurationSec.WithLabelValues("ReadonlyTxn", strconv.FormatBool(success)).Observe(time.Since(start).Seconds()) }(time.Now()) get := func() { @@ -837,7 +841,18 @@ func (s *EtcdServer) processInternalRaftRequestOnce(ctx context.Context, r pb.In } } - data, err := r.Marshal() + var ( + data []byte + err error + start = time.Now() + reqType = getRequestType(&r) + ) + defer func() { + success := err == nil + requestDurationSec.WithLabelValues(reqType, strconv.FormatBool(success)).Observe(time.Since(start).Seconds()) + }() + + data, err = r.Marshal() if err != nil { return nil, err } @@ -855,7 +870,6 @@ func (s *EtcdServer) processInternalRaftRequestOnce(ctx context.Context, r pb.In cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout()) defer cancel() - start := time.Now() span := trace.SpanFromContext(ctx) span.AddEvent("Send raft proposal") err = s.r.Propose(cctx, data) @@ -880,6 +894,73 @@ func (s *EtcdServer) processInternalRaftRequestOnce(ctx context.Context, r pb.In } } +func getRequestType(r *pb.InternalRaftRequest) string { + switch { + case r.Range != nil: + return "Range" + case r.Put != nil: + return "Put" + case r.DeleteRange != nil: + return "DeleteRange" + case r.Txn != nil: + return "Txn" + case r.Compaction != nil: + return "Compaction" + case r.LeaseGrant != nil: + return "LeaseGrant" + case r.LeaseRevoke != nil: + return "LeaseRevoke" + case r.LeaseCheckpoint != nil: + return "LeaseCheckpoint" + case r.Alarm != nil: + return "Alarm" + case r.Authenticate != nil: + return "Authenticate" + case r.AuthEnable != nil: + return "AuthEnable" + case r.AuthDisable != nil: + return "AuthDisable" + case r.AuthStatus != nil: + return "AuthStatus" + case r.AuthUserAdd != nil: + return "AuthUserAdd" + case r.AuthUserDelete != nil: + return "AuthUserDelete" + case r.AuthUserChangePassword != nil: + return "AuthUserChangePassword" + case r.AuthUserGrantRole != nil: + return "AuthUserGrantRole" + case r.AuthUserGet != nil: + return "AuthUserGet" + case r.AuthUserRevokeRole != nil: + return "AuthUserRevokeRole" + case r.AuthRoleAdd != nil: + return "AuthRoleAdd" + case r.AuthRoleGrantPermission != nil: + return "AuthRoleGrantPermission" + case r.AuthRoleGet != nil: + return "AuthRoleGet" + case r.AuthRoleRevokePermission != nil: + return "AuthRoleRevokePermission" + case r.AuthRoleDelete != nil: + return "AuthRoleDelete" + case r.AuthUserList != nil: + return "AuthUserList" + case r.AuthRoleList != nil: + return "AuthRoleList" + case r.ClusterVersionSet != nil: + return "ClusterVersionSet" + case r.ClusterMemberAttrSet != nil: + return "ClusterMemberAttrSet" + case r.DowngradeInfoSet != nil: + return "DowngradeInfoSet" + case r.DowngradeVersionTest != nil: + return "DowngradeVersionTest" + default: + return "Unknown" + } +} + // Watchable returns a watchable interface attached to the etcdserver. func (s *EtcdServer) Watchable() mvcc.WatchableKV { return s.KV() } diff --git a/tests/e2e/metrics_test.go b/tests/e2e/metrics_test.go index 976be635fea4..6e3774d1cf26 100644 --- a/tests/e2e/metrics_test.go +++ b/tests/e2e/metrics_test.go @@ -213,6 +213,7 @@ func TestNoMetricsMissing(t *testing.T) { "etcd_server_quota_backend_bytes", "etcd_server_range_duration_seconds", "etcd_server_read_indexes_failed_total", + "etcd_server_request_duration_seconds", "etcd_server_slow_apply_total", "etcd_server_slow_read_indexes_total", "etcd_server_snapshot_apply_in_progress_total",