From 74382f49a401394bf3b8c069a14304cc11e93b2d Mon Sep 17 00:00:00 2001 From: Wen Xu Date: Thu, 21 Dec 2023 19:55:50 +0000 Subject: [PATCH 1/2] add ring_members_by_zone metric if ring is zone aware Signed-off-by: Wen Xu --- pkg/ring/ring.go | 57 ++++++++++++++++++++++++++++++++++++------- pkg/ring/ring_test.go | 41 +++++++++++++++++++++++++++---- 2 files changed, 84 insertions(+), 14 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index b231014ed7..2a202a38e9 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -193,12 +193,13 @@ type Ring struct { // If set to nil, no caching is done (used by tests, and subrings). shuffledSubringCache map[subringCacheKey]*Ring - memberOwnershipGaugeVec *prometheus.GaugeVec - numMembersGaugeVec *prometheus.GaugeVec - totalTokensGauge prometheus.Gauge - numTokensGaugeVec *prometheus.GaugeVec - oldestTimestampGaugeVec *prometheus.GaugeVec - reportedOwners map[string]struct{} + memberOwnershipGaugeVec *prometheus.GaugeVec + numMembersGaugeVec *prometheus.GaugeVec + numMembersByZoneGaugeVec *prometheus.GaugeVec + totalTokensGauge prometheus.Gauge + numTokensGaugeVec *prometheus.GaugeVec + oldestTimestampGaugeVec *prometheus.GaugeVec + reportedOwners map[string]struct{} logger log.Logger } @@ -249,6 +250,11 @@ func NewWithStoreClientAndStrategy(cfg Config, name, key string, store kv.Client Help: "Number of members in the ring", ConstLabels: map[string]string{"name": name}}, []string{"state"}), + numMembersByZoneGaugeVec: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "ring_members_by_zone", + Help: "Number of members in the ring by zone", + ConstLabels: map[string]string{"name": name}}, + []string{"zone", "state"}), totalTokensGauge: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ Name: "ring_tokens_total", Help: "Number of tokens in the ring", @@ -338,6 +344,7 @@ func (r *Ring) updateRingState(ringDesc *Desc) { r.mtx.Lock() defer r.mtx.Unlock() + prevRingZones := r.ringZones r.ringDesc = ringDesc r.ringTokens = ringTokens r.ringTokensByZone = ringTokensByZone @@ -348,6 +355,24 @@ func (r *Ring) updateRingState(ringDesc *Desc) { // Invalidate all cached subrings. r.shuffledSubringCache = make(map[subringCacheKey]*Ring) } + //cleaning up non-existent zone metrics + for _, prevZone := range prevRingZones { + zoneStillExists := false + for _, currZone := range r.ringZones { + if prevZone == currZone { + zoneStillExists = true + break + } + } + if !zoneStillExists { + //need to remove non existing zone + for _, s := range []string{unhealthy, ACTIVE.String(), LEAVING.String(), PENDING.String(), JOINING.String()} { + if ok := r.numMembersByZoneGaugeVec.DeleteLabelValues(prevZone, s); !ok { + level.Warn(r.logger).Log("msg", "failed to remove ring_members_by_zone metric for non existing zone", "zone", prevZone, "state", s) + } + } + } + } r.updateRingMetrics(rc) } @@ -604,12 +629,20 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) { } numByState := map[string]int{} + numByZoneAndState := map[string]map[string]int{} oldestTimestampByState := map[string]int64{} // Initialized to zero so we emit zero-metrics (instead of not emitting anything) - for _, s := range []string{unhealthy, ACTIVE.String(), LEAVING.String(), PENDING.String(), JOINING.String()} { - numByState[s] = 0 - oldestTimestampByState[s] = 0 + zones := r.ringZones + for _, zone := range zones { + if _, ok := numByZoneAndState[zone]; !ok { + numByZoneAndState[zone] = make(map[string]int) + } + for _, s := range []string{unhealthy, ACTIVE.String(), LEAVING.String(), PENDING.String(), JOINING.String()} { + numByState[s] = 0 + oldestTimestampByState[s] = 0 + numByZoneAndState[zone][s] = 0 + } } for _, instance := range r.ringDesc.Ingesters { @@ -618,6 +651,7 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) { s = unhealthy } numByState[s]++ + numByZoneAndState[instance.Zone][s]++ if oldestTimestampByState[s] == 0 || instance.Timestamp < oldestTimestampByState[s] { oldestTimestampByState[s] = instance.Timestamp } @@ -626,6 +660,11 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) { for state, count := range numByState { r.numMembersGaugeVec.WithLabelValues(state).Set(float64(count)) } + for zone, numByStateCount := range numByZoneAndState { + for state, count := range numByStateCount { + r.numMembersByZoneGaugeVec.WithLabelValues(zone, state).Set(float64(count)) + } + } for state, timestamp := range oldestTimestampByState { r.oldestTimestampGaugeVec.WithLabelValues(state).Set(float64(timestamp)) } diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index ca735f6c9e..6d9e411eeb 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -2962,8 +2962,8 @@ func TestUpdateMetrics(t *testing.T) { ringDesc := Desc{ Ingesters: map[string]InstanceDesc{ - "A": {Addr: "127.0.0.1", Timestamp: 22, Tokens: []uint32{math.MaxUint32 / 4, (math.MaxUint32 / 4) * 3}}, - "B": {Addr: "127.0.0.2", Timestamp: 11, Tokens: []uint32{(math.MaxUint32 / 4) * 2, math.MaxUint32}}, + "A": {Addr: "127.0.0.1", Zone: "us-west-2a", Timestamp: 22, Tokens: []uint32{math.MaxUint32 / 4, (math.MaxUint32 / 4) * 3}}, + "B": {Addr: "127.0.0.2", Zone: "us-west-2b", Timestamp: 11, Tokens: []uint32{(math.MaxUint32 / 4) * 2, math.MaxUint32}}, }, } ring.updateRingState(&ringDesc) @@ -2980,6 +2980,18 @@ func TestUpdateMetrics(t *testing.T) { ring_members{name="test",state="LEAVING"} 0 ring_members{name="test",state="PENDING"} 0 ring_members{name="test",state="Unhealthy"} 0 + # HELP ring_members_by_zone Number of members in the ring by zone + # TYPE ring_members_by_zone gauge + ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2a"} 1 + ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2b"} 1 + ring_members_by_zone{name="test",state="JOINING",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="JOINING",zone="us-west-2b"} 0 + ring_members_by_zone{name="test",state="LEAVING",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="LEAVING",zone="us-west-2b"} 0 + ring_members_by_zone{name="test",state="PENDING",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="PENDING",zone="us-west-2b"} 0 + ring_members_by_zone{name="test",state="Unhealthy",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="Unhealthy",zone="us-west-2b"} 0 # HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring. # TYPE ring_oldest_member_timestamp gauge ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11 @@ -3014,8 +3026,8 @@ func TestUpdateMetricsWithRemoval(t *testing.T) { ringDesc := Desc{ Ingesters: map[string]InstanceDesc{ - "A": {Addr: "127.0.0.1", Timestamp: 22, Tokens: []uint32{math.MaxUint32 / 4, (math.MaxUint32 / 4) * 3}}, - "B": {Addr: "127.0.0.2", Timestamp: 11, Tokens: []uint32{(math.MaxUint32 / 4) * 2, math.MaxUint32}}, + "A": {Addr: "127.0.0.1", Zone: "us-west-2a", Timestamp: 22, Tokens: []uint32{math.MaxUint32 / 4, (math.MaxUint32 / 4) * 3}}, + "B": {Addr: "127.0.0.2", Zone: "us-west-2b", Timestamp: 11, Tokens: []uint32{(math.MaxUint32 / 4) * 2, math.MaxUint32}}, }, } ring.updateRingState(&ringDesc) @@ -3032,6 +3044,18 @@ func TestUpdateMetricsWithRemoval(t *testing.T) { ring_members{name="test",state="LEAVING"} 0 ring_members{name="test",state="PENDING"} 0 ring_members{name="test",state="Unhealthy"} 0 + # HELP ring_members_by_zone Number of members in the ring by zone + # TYPE ring_members_by_zone gauge + ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2a"} 1 + ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2b"} 1 + ring_members_by_zone{name="test",state="JOINING",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="JOINING",zone="us-west-2b"} 0 + ring_members_by_zone{name="test",state="LEAVING",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="LEAVING",zone="us-west-2b"} 0 + ring_members_by_zone{name="test",state="PENDING",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="PENDING",zone="us-west-2b"} 0 + ring_members_by_zone{name="test",state="Unhealthy",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="Unhealthy",zone="us-west-2b"} 0 # HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring. # TYPE ring_oldest_member_timestamp gauge ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11 @@ -3051,7 +3075,7 @@ func TestUpdateMetricsWithRemoval(t *testing.T) { ringDescNew := Desc{ Ingesters: map[string]InstanceDesc{ - "A": {Addr: "127.0.0.1", Timestamp: 22, Tokens: []uint32{math.MaxUint32 / 4, (math.MaxUint32 / 4) * 3}}, + "A": {Addr: "127.0.0.1", Zone: "us-west-2a", Timestamp: 22, Tokens: []uint32{math.MaxUint32 / 4, (math.MaxUint32 / 4) * 3}}, }, } ring.updateRingState(&ringDescNew) @@ -3067,6 +3091,13 @@ func TestUpdateMetricsWithRemoval(t *testing.T) { ring_members{name="test",state="LEAVING"} 0 ring_members{name="test",state="PENDING"} 0 ring_members{name="test",state="Unhealthy"} 0 + # HELP ring_members_by_zone Number of members in the ring by zone + # TYPE ring_members_by_zone gauge + ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2a"} 1 + ring_members_by_zone{name="test",state="JOINING",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="LEAVING",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="PENDING",zone="us-west-2a"} 0 + ring_members_by_zone{name="test",state="Unhealthy",zone="us-west-2a"} 0 # HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring. # TYPE ring_oldest_member_timestamp gauge ring_oldest_member_timestamp{name="test",state="ACTIVE"} 22 From 4abe7f3566e682b2aaa3d3c178e79ca909d57153 Mon Sep 17 00:00:00 2001 From: Wen Xu Date: Thu, 21 Dec 2023 21:21:39 +0000 Subject: [PATCH 2/2] fix indentation Signed-off-by: Wen Xu --- pkg/ring/ring_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 6d9e411eeb..f124e88092 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -2982,7 +2982,7 @@ func TestUpdateMetrics(t *testing.T) { ring_members{name="test",state="Unhealthy"} 0 # HELP ring_members_by_zone Number of members in the ring by zone # TYPE ring_members_by_zone gauge - ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2a"} 1 + ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2a"} 1 ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2b"} 1 ring_members_by_zone{name="test",state="JOINING",zone="us-west-2a"} 0 ring_members_by_zone{name="test",state="JOINING",zone="us-west-2b"} 0 @@ -3046,7 +3046,7 @@ func TestUpdateMetricsWithRemoval(t *testing.T) { ring_members{name="test",state="Unhealthy"} 0 # HELP ring_members_by_zone Number of members in the ring by zone # TYPE ring_members_by_zone gauge - ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2a"} 1 + ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2a"} 1 ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2b"} 1 ring_members_by_zone{name="test",state="JOINING",zone="us-west-2a"} 0 ring_members_by_zone{name="test",state="JOINING",zone="us-west-2b"} 0 @@ -3093,7 +3093,7 @@ func TestUpdateMetricsWithRemoval(t *testing.T) { ring_members{name="test",state="Unhealthy"} 0 # HELP ring_members_by_zone Number of members in the ring by zone # TYPE ring_members_by_zone gauge - ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2a"} 1 + ring_members_by_zone{name="test",state="ACTIVE",zone="us-west-2a"} 1 ring_members_by_zone{name="test",state="JOINING",zone="us-west-2a"} 0 ring_members_by_zone{name="test",state="LEAVING",zone="us-west-2a"} 0 ring_members_by_zone{name="test",state="PENDING",zone="us-west-2a"} 0