@@ -24,7 +24,12 @@ import (
2424
2525 "github.com/stretchr/testify/require"
2626
27+ "go.etcd.io/bbolt"
2728 "go.etcd.io/etcd/api/v3/etcdserverpb"
29+ "go.etcd.io/etcd/client/pkg/v3/types"
30+ "go.etcd.io/etcd/server/v3/datadir"
31+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32+ "go.etcd.io/etcd/server/v3/mvcc/buckets"
2833 "go.etcd.io/etcd/tests/v3/framework/e2e"
2934)
3035
@@ -230,31 +235,181 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230235 return e2e .SpawnWithExpectWithEnv (cmdArgs , cx .envMap , " updated in cluster " )
231236}
232237
238+ // TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239+ // issue caused by https://github.com/etcd-io/etcd/issues/19557.
233240func TestCtlV3PromotingLearner (t * testing.T ) {
234- e2e .BeforeTest (t )
241+ testCases := []struct {
242+ name string
243+ snapshotCount int
244+ promotionSuccess bool
245+ }{
246+ {
247+ name : "create snapshot after learner promotion which is not saved to v3store" ,
248+ snapshotCount : 10 ,
249+ },
250+ {
251+ name : "not create snapshot and learner promotion is not saved to v3store" ,
252+ snapshotCount : 0 ,
253+ },
254+ {
255+ name : "not create snapshot and learner promotion is saved to v3store" ,
256+ snapshotCount : 0 ,
257+ promotionSuccess : true ,
258+ },
259+ }
260+
261+ for _ , tc := range testCases {
262+ t .Run (tc .name , func (t * testing.T ) {
263+ t .Log ("Create a single node etcd cluster" )
264+ cfg := e2e .NewConfigNoTLS ()
265+ cfg .BasePeerScheme = "unix"
266+ cfg .ClusterSize = 1
267+ if tc .snapshotCount != 0 {
268+ cfg .SnapshotCount = tc .snapshotCount
269+ }
270+
271+ epc , err := e2e .NewEtcdProcessCluster (t , cfg )
272+ require .NoError (t , err , "failed to start etcd cluster: %v" , err )
273+ defer func () {
274+ derr := epc .Close ()
275+ require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
276+ }()
277+
278+ t .Log ("Add and start a learner" )
279+ learnerID , err := epc .StartNewProc (nil , true , t )
280+ require .NoError (t , err )
281+
282+ t .Log ("Write a key to ensure the cluster is healthy so far" )
283+ etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
284+ err = etcdctl .Put ("foo" , "bar" )
285+ require .NoError (t , err )
286+
287+ t .Logf ("Promoting the learner %x" , learnerID )
288+ resp , err := etcdctl .MemberPromote (learnerID )
289+ require .NoError (t , err )
290+
291+ var promotedMember * etcdserverpb.Member
292+ for _ , m := range resp .Members {
293+ if m .ID == learnerID {
294+ promotedMember = m
295+ break
296+ }
297+ }
298+ require .NotNil (t , promotedMember )
299+ t .Logf ("The promoted member: %+v" , promotedMember )
300+
301+ t .Log ("Ensure all members are voting members" )
302+ ensureAllMembersAreVotingMembers (t , etcdctl )
235303
236- t .Log ("Create a single node etcd cluster" )
237- cfg := e2e .NewConfigNoTLS ()
238- cfg .BasePeerScheme = "unix"
239- cfg .ClusterSize = 1
304+ if tc .snapshotCount != 0 {
305+ t .Logf ("Write %d keys to trigger a snapshot" , tc .snapshotCount )
306+ for i := 0 ; i < tc .snapshotCount ; i ++ {
307+ err = etcdctl .Put (fmt .Sprintf ("key_%d" , i ), fmt .Sprintf ("value_%d" , i ))
308+ require .NoError (t , err )
309+ }
310+ }
240311
241- epc , err := e2e .NewEtcdProcessCluster (t , cfg )
242- require .NoError (t , err , "failed to start etcd cluster: %v" , err )
312+ if tc .promotionSuccess {
313+ t .Log ("Skip manually changing the already promoted learner to a learner" )
314+ } else {
315+ t .Logf ("Stopping the already promoted member" )
316+ require .NoError (t , epc .Procs [1 ].Stop ())
317+
318+ t .Log ("Manually changing the already promoted member to a learner again" )
319+ promotedMember .IsLearner = true
320+ mustSaveMemberIntoBbolt (t , epc .Procs [1 ].Config ().DataDirPath , promotedMember )
321+
322+ t .Log ("Starting the member again" )
323+ require .NoError (t , epc .Procs [1 ].Start ())
324+ }
325+
326+ t .Log ("Checking all members are ready to serve client requests" )
327+ for i := 0 ; i < len (epc .Procs ); i ++ {
328+ e2e .AssertProcessLogs (t , epc .Procs [i ], e2e .EtcdServerReadyLines [0 ])
329+ }
330+
331+ // Wait for the learner published attribute to be applied by all members in the cluster
332+ t .Log ("Write a key to ensure the the learner published attribute has been applied by all members" )
333+ err = etcdctl .Put ("foo" , "bar" )
334+ require .NoError (t , err )
335+
336+ t .Log ("Ensure all members are voting members again" )
337+ for i := 0 ; i < len (epc .Procs ); i ++ {
338+ t .Logf ("Stopping the member: %d" , i )
339+ require .NoError (t , epc .Procs [i ].Stop ())
340+
341+ t .Logf ("Checking all members in member's backend store: %d" , i )
342+ ensureAllMembersFromV3StoreAreVotingMembers (t , epc .Procs [i ].Config ().DataDirPath )
343+
344+ t .Logf ("Starting the member again: %d" , i )
345+ require .NoError (t , epc .Procs [i ].Start ())
346+ }
347+ })
348+ }
349+ }
350+
351+ func mustSaveMemberIntoBbolt (t * testing.T , dataDir string , protoMember * etcdserverpb.Member ) {
352+ dbPath := datadir .ToBackendFileName (dataDir )
353+ db , err := bbolt .Open (dbPath , 0666 , nil )
354+ require .NoError (t , err )
243355 defer func () {
244- derr := epc .Close ()
245- require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
356+ require .NoError (t , db .Close ())
246357 }()
247358
248- t .Log ("Add and start a learner" )
249- learnerID , err := epc .StartNewProc (nil , true , t )
359+ m := & membership.Member {
360+ ID : types .ID (protoMember .ID ),
361+ RaftAttributes : membership.RaftAttributes {
362+ PeerURLs : protoMember .PeerURLs ,
363+ IsLearner : protoMember .IsLearner ,
364+ },
365+ Attributes : membership.Attributes {
366+ Name : protoMember .Name ,
367+ ClientURLs : protoMember .ClientURLs ,
368+ },
369+ }
370+
371+ err = db .Update (func (tx * bbolt.Tx ) error {
372+ b := tx .Bucket (buckets .Members .Name ())
373+
374+ mkey := []byte (m .ID .String ())
375+ mvalue , err := json .Marshal (m )
376+ require .NoError (t , err )
377+
378+ return b .Put (mkey , mvalue )
379+ })
250380 require .NoError (t , err )
381+ }
251382
252- t .Log ("Write a key to ensure the cluster is healthy so far" )
253- etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
254- err = etcdctl .Put ("foo" , "bar" )
383+ func ensureAllMembersAreVotingMembers (t * testing.T , etcdctl * e2e.Etcdctl ) {
384+ memberListResp , err := etcdctl .MemberList ()
255385 require .NoError (t , err )
386+ for _ , m := range memberListResp .Members {
387+ require .False (t , m .IsLearner )
388+ }
389+ }
256390
257- t .Logf ("Promoting the learner %x" , learnerID )
258- _ , err = etcdctl .MemberPromote (learnerID )
391+ func ensureAllMembersFromV3StoreAreVotingMembers (t * testing.T , dataDir string ) {
392+ dbPath := datadir .ToBackendFileName (dataDir )
393+ db , err := bbolt .Open (dbPath , 0400 , & bbolt.Options {ReadOnly : true })
259394 require .NoError (t , err )
395+ defer func () {
396+ require .NoError (t , db .Close ())
397+ }()
398+
399+ var members []membership.Member
400+ _ = db .View (func (tx * bbolt.Tx ) error {
401+ b := tx .Bucket (buckets .Members .Name ())
402+ _ = b .ForEach (func (k , v []byte ) error {
403+ m := membership.Member {}
404+ err := json .Unmarshal (v , & m )
405+ require .NoError (t , err )
406+ members = append (members , m )
407+ return nil
408+ })
409+ return nil
410+ })
411+
412+ for _ , m := range members {
413+ require .Falsef (t , m .IsLearner , "member is still learner: %+v" , m )
414+ }
260415}
0 commit comments