@@ -24,7 +24,12 @@ import (
2424
2525 "github.com/stretchr/testify/require"
2626
27+ "go.etcd.io/bbolt"
2728 "go.etcd.io/etcd/api/v3/etcdserverpb"
29+ "go.etcd.io/etcd/client/pkg/v3/types"
30+ "go.etcd.io/etcd/server/v3/datadir"
31+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32+ "go.etcd.io/etcd/server/v3/mvcc/buckets"
2833 "go.etcd.io/etcd/tests/v3/framework/e2e"
2934)
3035
@@ -230,31 +235,140 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230235 return e2e .SpawnWithExpectWithEnv (cmdArgs , cx .envMap , " updated in cluster " )
231236}
232237
238+ // TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239+ // issue caused by https://github.com/etcd-io/etcd/issues/19557.
233240func TestCtlV3PromotingLearner (t * testing.T ) {
234- e2e .BeforeTest (t )
241+ testCases := []struct {
242+ name string
243+ snapshotCount int
244+ promotionSuccess bool
245+ }{
246+ {
247+ name : "create snapshot after learner promotion and not saved to v3store" ,
248+ snapshotCount : 10 ,
249+ },
250+ {
251+ name : "no snapshot and learner promotion not saved to v3store" ,
252+ snapshotCount : 0 ,
253+ },
254+ {
255+ name : "no snapshot and learner promotion saved to v3store" ,
256+ snapshotCount : 0 ,
257+ promotionSuccess : true ,
258+ },
259+ }
260+
261+ for _ , tc := range testCases {
262+ t .Run (tc .name , func (t * testing.T ) {
263+
264+ t .Log ("Create a single node etcd cluster" )
265+ cfg := e2e .NewConfigNoTLS ()
266+ cfg .BasePeerScheme = "unix"
267+ cfg .ClusterSize = 1
268+ if tc .snapshotCount != 0 {
269+ cfg .SnapshotCount = tc .snapshotCount
270+ }
271+
272+ epc , err := e2e .NewEtcdProcessCluster (t , cfg )
273+ require .NoError (t , err , "failed to start etcd cluster: %v" , err )
274+ defer func () {
275+ derr := epc .Close ()
276+ require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
277+ }()
278+
279+ t .Log ("Add and start a learner" )
280+ learnerID , err := epc .StartNewProc (nil , true , t )
281+ require .NoError (t , err )
282+
283+ t .Log ("Write a key to ensure the cluster is healthy so far" )
284+ etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
285+ err = etcdctl .Put ("foo" , "bar" )
286+ require .NoError (t , err )
287+
288+ t .Logf ("Promoting the learner %x" , learnerID )
289+ resp , err := etcdctl .MemberPromote (learnerID )
290+ require .NoError (t , err )
291+
292+ var promotedMember * etcdserverpb.Member
293+ for _ , m := range resp .Members {
294+ if m .ID == learnerID {
295+ promotedMember = m
296+ break
297+ }
298+ }
299+ require .NotNil (t , promotedMember )
300+ t .Logf ("The promoted member: %+v" , promotedMember )
301+
302+ t .Log ("Ensure all members are voting members" )
303+ ensureAllMembersAreVotingMembers (t , etcdctl )
304+
305+ if tc .snapshotCount != 0 {
306+ t .Logf ("Write %d keys to trigger a snapshot" , tc .snapshotCount )
307+ for i := 0 ; i < tc .snapshotCount ; i ++ {
308+ err = etcdctl .Put (fmt .Sprintf ("key_%d" , i ), fmt .Sprintf ("value_%d" , i ))
309+ require .NoError (t , err )
310+ }
311+ }
312+
313+ t .Logf ("Stopping the first member" )
314+ require .NoError (t , epc .Procs [0 ].Stop ())
315+
316+ if tc .promotionSuccess {
317+ t .Log ("Skip manually changing the already promoted learner to a learner" )
318+ } else {
319+ t .Log ("Manually changing the already promoted learner to a learner again" )
320+ promotedMember .IsLearner = true
321+ mustSaveMemberIntoBbolt (t , epc .Procs [0 ].Config ().DataDirPath , promotedMember )
322+ }
235323
236- t .Log ("Create a single node etcd cluster" )
237- cfg := e2e .NewConfigNoTLS ()
238- cfg .BasePeerScheme = "unix"
239- cfg .ClusterSize = 1
324+ t .Log ("Starting the first member again" )
325+ require .NoError (t , epc .Procs [0 ].Start ())
240326
241- epc , err := e2e .NewEtcdProcessCluster (t , cfg )
242- require .NoError (t , err , "failed to start etcd cluster: %v" , err )
327+ t .Log ("Checking the auto-sync learner log message" )
328+ e2e .AssertProcessLogs (t , epc .Procs [0 ], e2e .EtcdServerReadyLines [0 ])
329+
330+ t .Log ("Ensure all members are voting members again" )
331+ ensureAllMembersAreVotingMembers (t , etcdctl )
332+ })
333+ }
334+ }
335+
336+ func mustSaveMemberIntoBbolt (t * testing.T , dataDir string , protoMember * etcdserverpb.Member ) {
337+ dbPath := datadir .ToBackendFileName (dataDir )
338+ db , err := bbolt .Open (dbPath , 0666 , nil )
339+ require .NoError (t , err )
243340 defer func () {
244- derr := epc .Close ()
245- require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
341+ require .NoError (t , db .Close ())
246342 }()
247343
248- t .Log ("Add and start a learner" )
249- learnerID , err := epc .StartNewProc (nil , true , t )
250- require .NoError (t , err )
344+ m := & membership.Member {
345+ ID : types .ID (protoMember .ID ),
346+ RaftAttributes : membership.RaftAttributes {
347+ PeerURLs : protoMember .PeerURLs ,
348+ IsLearner : protoMember .IsLearner ,
349+ },
350+ Attributes : membership.Attributes {
351+ Name : protoMember .Name ,
352+ ClientURLs : protoMember .ClientURLs ,
353+ },
354+ }
355+
356+ err = db .Update (func (tx * bbolt.Tx ) error {
357+ b := tx .Bucket (buckets .Members .Name ())
358+
359+ mkey := []byte (m .ID .String ())
360+ mvalue , err := json .Marshal (m )
361+ require .NoError (t , err )
251362
252- t .Log ("Write a key to ensure the cluster is healthy so far" )
253- etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
254- err = etcdctl .Put ("foo" , "bar" )
363+ return b .Put (mkey , mvalue )
364+ })
255365 require .NoError (t , err )
366+ }
256367
257- t . Logf ( "Promoting the learner %x" , learnerID )
258- _ , err = etcdctl .MemberPromote ( learnerID )
368+ func ensureAllMembersAreVotingMembers ( t * testing. T , etcdctl * e2e. Etcdctl ) {
369+ memberListResp , err : = etcdctl .MemberList ( )
259370 require .NoError (t , err )
371+ for _ , m := range memberListResp .Members {
372+ require .False (t , m .IsLearner )
373+ }
260374}
0 commit comments