@@ -24,7 +24,12 @@ import (
2424
2525 "github.com/stretchr/testify/require"
2626
27+ "go.etcd.io/bbolt"
2728 "go.etcd.io/etcd/api/v3/etcdserverpb"
29+ "go.etcd.io/etcd/client/pkg/v3/types"
30+ "go.etcd.io/etcd/server/v3/datadir"
31+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32+ "go.etcd.io/etcd/server/v3/mvcc/buckets"
2833 "go.etcd.io/etcd/tests/v3/framework/e2e"
2934)
3035
@@ -230,31 +235,185 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230235 return e2e .SpawnWithExpectWithEnv (cmdArgs , cx .envMap , " updated in cluster " )
231236}
232237
238+ // TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239+ // issue caused by https://github.com/etcd-io/etcd/issues/19557.
233240func TestCtlV3PromotingLearner (t * testing.T ) {
234- e2e .BeforeTest (t )
241+ testCases := []struct {
242+ name string
243+ snapshotCount int
244+ writeToV3StoreSuccess bool
245+ }{
246+ {
247+ name : "create snapshot after learner promotion which is not saved to v3store" ,
248+ snapshotCount : 10 ,
249+ },
250+ {
251+ name : "not create snapshot and learner promotion is not saved to v3store" ,
252+ snapshotCount : 0 ,
253+ },
254+ {
255+ name : "not create snapshot and learner promotion is saved to v3store" ,
256+ snapshotCount : 0 ,
257+ writeToV3StoreSuccess : true ,
258+ },
259+ }
260+
261+ for _ , tc := range testCases {
262+ t .Run (tc .name , func (t * testing.T ) {
263+ t .Log ("Create a single node etcd cluster" )
264+ cfg := e2e .NewConfigNoTLS ()
265+ cfg .BasePeerScheme = "unix"
266+ cfg .ClusterSize = 1
267+ cfg .InitialCorruptCheck = true
268+ if tc .snapshotCount != 0 {
269+ cfg .SnapshotCount = tc .snapshotCount
270+ }
271+
272+ epc , err := e2e .NewEtcdProcessCluster (t , cfg )
273+ require .NoError (t , err , "failed to start etcd cluster: %v" , err )
274+ defer func () {
275+ derr := epc .Close ()
276+ require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
277+ }()
278+
279+ t .Log ("Add and start a learner" )
280+ learnerID , err := epc .StartNewProc (nil , true , t )
281+ require .NoError (t , err )
282+
283+ t .Log ("Write a key to ensure the cluster is healthy so far" )
284+ etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
285+ err = etcdctl .Put ("foo" , "bar" )
286+ require .NoError (t , err )
287+
288+ t .Logf ("Promoting the learner %x" , learnerID )
289+ resp , err := etcdctl .MemberPromote (learnerID )
290+ require .NoError (t , err )
291+
292+ var promotedMember * etcdserverpb.Member
293+ for _ , m := range resp .Members {
294+ if m .ID == learnerID {
295+ promotedMember = m
296+ break
297+ }
298+ }
299+ require .NotNil (t , promotedMember )
300+ t .Logf ("The promoted member: %+v" , promotedMember )
301+
302+ t .Log ("Ensure all members are voting members from user perspective" )
303+ ensureAllMembersAreVotingMembers (t , etcdctl )
235304
236- t .Log ("Create a single node etcd cluster" )
237- cfg := e2e .NewConfigNoTLS ()
238- cfg .BasePeerScheme = "unix"
239- cfg .ClusterSize = 1
305+ if tc .snapshotCount != 0 {
306+ t .Logf ("Write %d keys to trigger a snapshot" , tc .snapshotCount )
307+ for i := 0 ; i < tc .snapshotCount ; i ++ {
308+ err = etcdctl .Put (fmt .Sprintf ("key_%d" , i ), fmt .Sprintf ("value_%d" , i ))
309+ require .NoError (t , err )
310+ }
311+ }
240312
241- epc , err := e2e .NewEtcdProcessCluster (t , cfg )
242- require .NoError (t , err , "failed to start etcd cluster: %v" , err )
313+ if tc .writeToV3StoreSuccess {
314+ t .Log ("Skip manually changing the already promoted learner to a learner in v3store" )
315+ } else {
316+ t .Logf ("Stopping the already promoted member" )
317+ require .NoError (t , epc .Procs [1 ].Stop ())
318+
319+ t .Log ("Manually changing the already promoted member to a learner again in v3store" )
320+ promotedMember .IsLearner = true
321+ mustSaveMemberIntoBbolt (t , epc .Procs [1 ].Config ().DataDirPath , promotedMember )
322+
323+ t .Log ("Starting the member again" )
324+ require .NoError (t , epc .Procs [1 ].Start ())
325+ }
326+
327+ t .Log ("Checking all members are ready to serve client requests" )
328+ for i := 0 ; i < len (epc .Procs ); i ++ {
329+ e2e .AssertProcessLogs (t , epc .Procs [i ], e2e .EtcdServerReadyLines [0 ])
330+ }
331+
332+ // Wait for the learner published attribute to be applied by all members in the cluster
333+ t .Log ("Write a key to ensure the the learner published attribute has been applied by all members" )
334+ for i := 0 ; i < len (epc .Procs ); i ++ {
335+ cli := epc .Procs [i ].Etcdctl (e2e .ClientNonTLS , false , false )
336+ err = cli .Put ("foo" , "bar" )
337+ require .NoError (t , err )
338+ }
339+
340+ t .Log ("Ensure all members in v3store are voting members again" )
341+ for i := 0 ; i < len (epc .Procs ); i ++ {
342+ t .Logf ("Stopping the member: %d" , i )
343+ require .NoError (t , epc .Procs [i ].Stop ())
344+
345+ t .Logf ("Checking all members in member's backend store: %d" , i )
346+ ensureAllMembersFromV3StoreAreVotingMembers (t , epc .Procs [i ].Config ().DataDirPath )
347+
348+ t .Logf ("Starting the member again: %d" , i )
349+ require .NoError (t , epc .Procs [i ].Start ())
350+ }
351+ })
352+ }
353+ }
354+
355+ func mustSaveMemberIntoBbolt (t * testing.T , dataDir string , protoMember * etcdserverpb.Member ) {
356+ dbPath := datadir .ToBackendFileName (dataDir )
357+ db , err := bbolt .Open (dbPath , 0666 , nil )
358+ require .NoError (t , err )
243359 defer func () {
244- derr := epc .Close ()
245- require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
360+ require .NoError (t , db .Close ())
246361 }()
247362
248- t .Log ("Add and start a learner" )
249- learnerID , err := epc .StartNewProc (nil , true , t )
363+ m := & membership.Member {
364+ ID : types .ID (protoMember .ID ),
365+ RaftAttributes : membership.RaftAttributes {
366+ PeerURLs : protoMember .PeerURLs ,
367+ IsLearner : protoMember .IsLearner ,
368+ },
369+ Attributes : membership.Attributes {
370+ Name : protoMember .Name ,
371+ ClientURLs : protoMember .ClientURLs ,
372+ },
373+ }
374+
375+ err = db .Update (func (tx * bbolt.Tx ) error {
376+ b := tx .Bucket (buckets .Members .Name ())
377+
378+ mkey := []byte (m .ID .String ())
379+ mvalue , err := json .Marshal (m )
380+ require .NoError (t , err )
381+
382+ return b .Put (mkey , mvalue )
383+ })
250384 require .NoError (t , err )
385+ }
251386
252- t .Log ("Write a key to ensure the cluster is healthy so far" )
253- etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
254- err = etcdctl .Put ("foo" , "bar" )
387+ func ensureAllMembersAreVotingMembers (t * testing.T , etcdctl * e2e.Etcdctl ) {
388+ memberListResp , err := etcdctl .MemberList ()
255389 require .NoError (t , err )
390+ for _ , m := range memberListResp .Members {
391+ require .False (t , m .IsLearner )
392+ }
393+ }
256394
257- t .Logf ("Promoting the learner %x" , learnerID )
258- _ , err = etcdctl .MemberPromote (learnerID )
395+ func ensureAllMembersFromV3StoreAreVotingMembers (t * testing.T , dataDir string ) {
396+ dbPath := datadir .ToBackendFileName (dataDir )
397+ db , err := bbolt .Open (dbPath , 0400 , & bbolt.Options {ReadOnly : true })
259398 require .NoError (t , err )
399+ defer func () {
400+ require .NoError (t , db .Close ())
401+ }()
402+
403+ var members []membership.Member
404+ _ = db .View (func (tx * bbolt.Tx ) error {
405+ b := tx .Bucket (buckets .Members .Name ())
406+ _ = b .ForEach (func (k , v []byte ) error {
407+ m := membership.Member {}
408+ err := json .Unmarshal (v , & m )
409+ require .NoError (t , err )
410+ members = append (members , m )
411+ return nil
412+ })
413+ return nil
414+ })
415+
416+ for _ , m := range members {
417+ require .Falsef (t , m .IsLearner , "member is still learner: %+v" , m )
418+ }
260419}
0 commit comments