Skip to content

Commit 0b00ba0

Browse files
EmergencyReparentShard: wait only for majority of most advanced relay logs
Signed-off-by: Tim Vaillancourt <[email protected]>
1 parent d9380c1 commit 0b00ba0

File tree

5 files changed

+108
-9
lines changed

5 files changed

+108
-9
lines changed

go/mysql/replication/replication_position.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,22 @@ func (rp Position) IsZero() bool {
9797
return rp.GTIDSet == nil
9898
}
9999

100+
// ComparePositions compares two Positions, returning:
101+
// 0 if both a anb b are equal positions.
102+
// 1 if a is > than b.
103+
// -1 if a is < than b.
104+
// This can be used as a sort function via
105+
// slices.SortFunc and slices.SortFuncStable.
106+
func ComparePositions(a, b Position) int {
107+
if a.Equal(b) {
108+
return 0
109+
}
110+
if a.AtLeast(b) {
111+
return -1
112+
}
113+
return 1
114+
}
115+
100116
// AppendGTID returns a new Position that represents the position
101117
// after the given GTID is replicated.
102118
func AppendGTID(rp Position, gtid GTID) Position {

go/mysql/replication/replication_position_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ package replication
1818

1919
import (
2020
"encoding/json"
21+
"slices"
2122
"strings"
2223
"testing"
2324

2425
"github.com/stretchr/testify/assert"
26+
"github.com/stretchr/testify/require"
2527
)
2628

2729
func TestPositionEqual(t *testing.T) {
@@ -407,3 +409,32 @@ func TestJsonUnmarshalPositionZero(t *testing.T) {
407409
assert.True(t, got.Equal(want), "json.Unmarshal(%#v) = %#v, want %#v", input, got, want)
408410

409411
}
412+
413+
func TestComparePositionsSortStable(t *testing.T) {
414+
sid, _ := ParseSID("3e11fa47-71ca-11e1-9e33-c80aa9429562")
415+
positions := []Position{
416+
{GTIDSet: Mysql56GTIDSet{sid: []interval{{start: 1, end: 5}}}},
417+
{GTIDSet: Mysql56GTIDSet{sid: []interval{{start: 1, end: 5}}}},
418+
{GTIDSet: Mysql56GTIDSet{sid: []interval{{start: 1, end: 6}}}},
419+
{GTIDSet: Mysql56GTIDSet{sid: []interval{{start: 1, end: 2}}}},
420+
{GTIDSet: Mysql56GTIDSet{sid: []interval{{start: 1, end: 7}}}},
421+
{GTIDSet: Mysql56GTIDSet{sid: []interval{{start: 1, end: 6}}}},
422+
}
423+
424+
wantedStrings := []string{
425+
"3e11fa47-71ca-11e1-9e33-c80aa9429562:1-7",
426+
"3e11fa47-71ca-11e1-9e33-c80aa9429562:1-6",
427+
"3e11fa47-71ca-11e1-9e33-c80aa9429562:1-6",
428+
"3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5",
429+
"3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5",
430+
"3e11fa47-71ca-11e1-9e33-c80aa9429562:1-2",
431+
}
432+
433+
slices.SortStableFunc(positions, func(a, b Position) int {
434+
return ComparePositions(a, b)
435+
})
436+
437+
for i, wanted := range wantedStrings {
438+
require.Equal(t, wanted, positions[i].String())
439+
}
440+
}

go/vt/vtctl/reparentutil/emergency_reparenter.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
222222
return err
223223
}
224224
// Restrict the valid candidates list. We remove any tablet which is of the type DRAINED, RESTORE or BACKUP.
225+
// The remaining candidates are reduced to a majority with the most advanced relay log GTIDs.
225226
validCandidates, err = restrictValidCandidates(validCandidates, tabletMap)
226227
if err != nil {
227228
return err

go/vt/vtctl/reparentutil/util.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ package reparentutil
1919
import (
2020
"context"
2121
"fmt"
22+
"math"
23+
"slices"
2224
"strings"
2325
"sync"
2426
"time"
@@ -313,9 +315,23 @@ func getValidCandidatesAndPositionsAsList(validCandidates map[string]replication
313315
return validTablets, tabletPositions, nil
314316
}
315317

318+
// getValidCandidatesMajorityCount returns a number equal to a majority of candidates. If
319+
// there are fewer than 3 candidates, all provided candidates are the majority.
320+
func getValidCandidatesMajorityCount(validCandidates map[string]replication.Position) int {
321+
totalCandidates := len(validCandidates)
322+
if totalCandidates == 0 {
323+
return 0
324+
}
325+
if totalCandidates < 3 {
326+
return totalCandidates
327+
}
328+
return int(math.Floor(float64(totalCandidates)/2) + 1)
329+
}
330+
316331
// restrictValidCandidates is used to restrict some candidates from being considered eligible for becoming the intermediate source or the final promotion candidate
317332
func restrictValidCandidates(validCandidates map[string]replication.Position, tabletMap map[string]*topo.TabletInfo) (map[string]replication.Position, error) {
318333
restrictedValidCandidates := make(map[string]replication.Position)
334+
validPositionsSlice := make([]replication.Position, 0, len(validCandidates))
319335
for candidate, position := range validCandidates {
320336
candidateInfo, ok := tabletMap[candidate]
321337
if !ok {
@@ -326,6 +342,23 @@ func restrictValidCandidates(validCandidates map[string]replication.Position, ta
326342
continue
327343
}
328344
restrictedValidCandidates[candidate] = position
345+
validPositionsSlice = append(validPositionsSlice, position)
346+
}
347+
348+
// sort by replication positions with greatest GTID set first, then remove replicas
349+
// that are not part of the majority of the most-advanced replicas.
350+
slices.SortStableFunc(validPositionsSlice, func(a, b replication.Position) int {
351+
return replication.ComparePositions(a, b)
352+
})
353+
majorityCandidatesCount := getValidCandidatesMajorityCount(restrictedValidCandidates)
354+
validPositionsSlice = validPositionsSlice[:majorityCandidatesCount]
355+
for tabletAlias, position := range restrictedValidCandidates {
356+
if slices.ContainsFunc(validPositionsSlice, func(rp replication.Position) bool {
357+
return position.Equal(rp)
358+
}) {
359+
continue
360+
}
361+
delete(restrictedValidCandidates, tabletAlias)
329362
}
330363
return restrictedValidCandidates, nil
331364
}

go/vt/vtctl/reparentutil/util_test.go

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1722,6 +1722,10 @@ func TestWaitForCatchUp(t *testing.T) {
17221722
}
17231723

17241724
func TestRestrictValidCandidates(t *testing.T) {
1725+
gtidSet1, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-6")
1726+
gtidSet2, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5")
1727+
gtidSet3, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-3")
1728+
gtidSet4, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-2")
17251729
tests := []struct {
17261730
name string
17271731
validCandidates map[string]replication.Position
@@ -1731,12 +1735,12 @@ func TestRestrictValidCandidates(t *testing.T) {
17311735
{
17321736
name: "remove invalid tablets",
17331737
validCandidates: map[string]replication.Position{
1734-
"zone1-0000000100": {},
1735-
"zone1-0000000101": {},
1736-
"zone1-0000000102": {},
1737-
"zone1-0000000103": {},
1738-
"zone1-0000000104": {},
1739-
"zone1-0000000105": {},
1738+
"zone1-0000000100": {GTIDSet: gtidSet1},
1739+
"zone1-0000000101": {GTIDSet: gtidSet2},
1740+
"zone1-0000000102": {GTIDSet: gtidSet2},
1741+
"zone1-0000000103": {GTIDSet: gtidSet3},
1742+
"zone1-0000000104": {GTIDSet: gtidSet3},
1743+
"zone1-0000000105": {GTIDSet: gtidSet4},
17401744
},
17411745
tabletMap: map[string]*topo.TabletInfo{
17421746
"zone1-0000000100": {
@@ -1795,9 +1799,8 @@ func TestRestrictValidCandidates(t *testing.T) {
17951799
},
17961800
},
17971801
result: map[string]replication.Position{
1798-
"zone1-0000000100": {},
1799-
"zone1-0000000101": {},
1800-
"zone1-0000000104": {},
1802+
"zone1-0000000100": {GTIDSet: gtidSet1},
1803+
"zone1-0000000101": {GTIDSet: gtidSet2},
18011804
},
18021805
},
18031806
}
@@ -2074,3 +2077,18 @@ func TestGetBackupCandidates(t *testing.T) {
20742077
})
20752078
}
20762079
}
2080+
2081+
func TestGetValidCandidatesMajorityCount(t *testing.T) {
2082+
buildCandidatesFunc := func(length int) map[string]replication.Position {
2083+
candidates := make(map[string]replication.Position, length)
2084+
for i := 1; i <= length; i++ {
2085+
candidates[fmt.Sprintf("candidate-%d", i)] = replication.Position{}
2086+
}
2087+
return candidates
2088+
}
2089+
require.Equal(t, 1, getValidCandidatesMajorityCount(buildCandidatesFunc(1)))
2090+
require.Equal(t, 2, getValidCandidatesMajorityCount(buildCandidatesFunc(2)))
2091+
require.Equal(t, 2, getValidCandidatesMajorityCount(buildCandidatesFunc(3)))
2092+
require.Equal(t, 3, getValidCandidatesMajorityCount(buildCandidatesFunc(5)))
2093+
require.Equal(t, 5, getValidCandidatesMajorityCount(buildCandidatesFunc(9)))
2094+
}

0 commit comments

Comments
 (0)