From ea532f15f8fcf8e5fd96042053018430f4dd2452 Mon Sep 17 00:00:00 2001
From: Tobias Grieger <tobias.b.grieger@gmail.com>
Date: Thu, 13 Feb 2025 08:57:38 +0100
Subject: [PATCH] cli: fully and properly drain target node of decommission

With this patch, at the end of decommissioning, we call the drain step as we
would for `./cockroach node drain`:

```
[...]
.....
id	is_live	replicas	is_decommissioning	membership	is_draining	readiness	blocking_ranges
1	true	2	true	decommissioning	false	ready	0
.....
id	is_live	replicas	is_decommissioning	membership	is_draining	readiness	blocking_ranges
1	true	1	true	decommissioning	false	ready	0
......
id	is_live	replicas	is_decommissioning	membership	is_draining	readiness	blocking_ranges
1	true	0	true	decommissioning	false	ready	0
draining node n2
node is draining... remaining: 26
node is draining... remaining: 0 (complete)
node n2 drained successfully

No more data reported on target nodes. Please verify cluster health before removing the nodes.
```

In particular, note how the first invocation returns a RemainingIndicator of
26. This explains the failure in #140774 - #138732 was insufficient as it did
not guarantee that the node had actually drained fully by the time it was
marked as fully decommissioned and the `node decommission` had returned.

See https://github.com/cockroachdb/cockroach/issues/140774.

I verified that the modified decommission/drains roachtest passes via

```
./pkg/cmd/roachtest/roachstress.sh -l -c 1 decommission/drains/alive
```

Touches https://github.com/cockroachdb/cockroach/issues/140774.
^-- backport to 25.1-rc would fix it.
Touches https://github.com/cockroachdb/cockroach/issues/139411.
^-- backport to 25.1 will fix it.
Fixes https://github.com/cockroachdb/cockroach/issues/139413.

Release note (ops change): the node decommission cli command now waits until
the target node is drained before marking it as fully decommissioned.
Previously, it would start drain but not wait, leaving the target node briefly
in a state where it would be unable to communicate with the cluster but would
still accept client requests (which would then hang or hit unexpected errors).
Note that a previous release note claimed to fix the same defect, but in fact
only reduced the likelihood of its occurrence. As of this release note, this
problem has truly been addressed.
Epic: None
---
 pkg/cli/node.go                         | 41 ++++++++++---------------
 pkg/cmd/roachtest/tests/decommission.go | 36 +++++++++++++++++++---
 2 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/pkg/cli/node.go b/pkg/cli/node.go
index 7fbe272e441d..92911dfd3f39 100644
--- a/pkg/cli/node.go
+++ b/pkg/cli/node.go
@@ -8,7 +8,6 @@ package cli
 import (
 	"context"
 	"fmt"
-	"io"
 	"math"
 	"os"
 	"reflect"
@@ -610,29 +609,23 @@ func runDecommissionNodeImpl(
 					)
 					continue
 				}
-				drainReq := &serverpb.DrainRequest{
-					Shutdown: false,
-					DoDrain:  true,
-					NodeId:   targetNode.String(),
-				}
-				stream, err := c.Drain(ctx, drainReq)
-				if err != nil {
-					fmt.Fprintln(stderr)
-					return errors.Wrapf(err, "while trying to drain n%d", targetNode)
-				}
-
-				// Consume responses until the stream ends (which signals drain
-				// completion).
-				for {
-					_, err := stream.Recv()
-					if err == io.EOF {
-						// Stream gracefully closed by other side.
-						break
-					}
-					if err != nil {
-						fmt.Fprintln(stderr)
-						return errors.Wrapf(err, "while trying to drain n%d", targetNode)
-					}
+				_, _ = fmt.Fprintf(stderr, "draining node n%d\n", targetNode)
+
+				if _, _, err := doDrain(ctx, c, targetNode.String()); err != nil {
+					// NB: doDrain already prints to stdErr.
+					//
+					// Defense in depth: in decommission invocations that don't have to
+					// do much work, if the target node was _just_ shutdown prior to
+					// starting `node decommission`, the node may be absent but the liveness
+					// status sent us here anyway. We don't want to fail out on the drain
+					// step to make the decommissioning command more robust.
+					_, _ = fmt.Fprintf(stderr,
+						"drain step for node n%d failed; decommissioning anyway\n", targetNode,
+					)
+					_ = err // discard intentionally
+				} else {
+					// NB: this output is matched on in the decommission/drains roachtest.
+					_, _ = fmt.Fprintf(stderr, "node n%d drained successfully\n", targetNode)
 				}
 			}
 
diff --git a/pkg/cmd/roachtest/tests/decommission.go b/pkg/cmd/roachtest/tests/decommission.go
index 5fc78b9d0330..da15f031375e 100644
--- a/pkg/cmd/roachtest/tests/decommission.go
+++ b/pkg/cmd/roachtest/tests/decommission.go
@@ -1056,7 +1056,7 @@ func runDecommissionDrains(ctx context.Context, t test.Test, c cluster.Cluster,
 	}
 	t.Status(fmt.Sprintf("decommissioning node %d", decommNodeID))
 	e := retry.WithMaxAttempts(ctx, retryOpts, maxAttempts, func() error {
-		o, err := h.decommission(ctx, decommNode, pinnedNodeID, "--wait=none", "--format=csv")
+		o, e, err := h.decommissionExt(ctx, decommNode, pinnedNodeID, "--wait=none", "--format=csv")
 		require.NoError(t, errors.Wrapf(err, "decommission failed"))
 
 		// Check if all the replicas have been transferred.
@@ -1072,6 +1072,15 @@ func runDecommissionDrains(ctx context.Context, t test.Test, c cluster.Cluster,
 			return nil
 		}
 
+		// Check that the cli printed the below message. We do this because the drain
+		// step does not error out the decommission command on failure (to make that
+		// command more resilient to situations in which a node is terminated right
+		// around the time we try to drain it).
+		// This message is emitted in `cli.runDecommissionNodeImpl` and has a
+		// back-referencing comment mentioning that this roachtest matches on
+		// it.
+		require.Contains(t, e, "drained successfully")
+
 		// Check to see if the node has been drained or decommissioned.
 		// If not, queries should not fail.
 		// Connect to node 4 (the target node of the decommission).
@@ -1235,11 +1244,21 @@ func (h *decommTestHelper) getLogicalNodeID(ctx context.Context, nodeIdx int) (i
 	return nodeID, nil
 }
 
-// decommission decommissions the given targetNodes, running the process
-// through the specified runNode.
 func (h *decommTestHelper) decommission(
 	ctx context.Context, targetNodes option.NodeListOption, runNode int, verbs ...string,
 ) (string, error) {
+	o, _, err := h.decommissionExt(ctx, targetNodes, runNode, verbs...)
+	return o, err
+}
+
+// decommission decommissions the given targetNodes, running the process
+// through the specified runNode.
+// Returns stdout, stderr, error.
+// Stdout has the tabular decommission process, stderr contains informational
+// updates.
+func (h *decommTestHelper) decommissionExt(
+	ctx context.Context, targetNodes option.NodeListOption, runNode int, verbs ...string,
+) (string, string, error) {
 	args := []string{"node", "decommission"}
 	args = append(args, verbs...)
 
@@ -1250,7 +1269,7 @@ func (h *decommTestHelper) decommission(
 			args = append(args, strconv.Itoa(target))
 		}
 	}
-	return execCLI(ctx, h.t, h.c, runNode, args...)
+	return execCLIExt(ctx, h.t, h.c, runNode, args...)
 }
 
 // recommission recommissions the given targetNodes, running the process
@@ -1479,13 +1498,20 @@ func (h *decommTestHelper) getRandNodeOtherThan(ids ...int) int {
 func execCLI(
 	ctx context.Context, t test.Test, c cluster.Cluster, runNode int, extraArgs ...string,
 ) (string, error) {
+	out, _, err := execCLIExt(ctx, t, c, runNode, extraArgs...)
+	return out, err
+}
+
+func execCLIExt(
+	ctx context.Context, t test.Test, c cluster.Cluster, runNode int, extraArgs ...string,
+) (string, string, error) {
 	args := []string{"./cockroach"}
 	args = append(args, extraArgs...)
 	args = append(args, fmt.Sprintf("--port={pgport:%d}", runNode))
 	args = append(args, fmt.Sprintf("--certs-dir=%s", install.CockroachNodeCertsDir))
 	result, err := c.RunWithDetailsSingleNode(ctx, t.L(), option.WithNodes(c.Node(runNode)), args...)
 	t.L().Printf("%s\n", result.Stdout)
-	return result.Stdout, err
+	return result.Stdout, result.Stderr, err
 }
 
 // Increase the logging verbosity for decommission tests to make life easier