Skip to content

Commit caf19d2

Browse files
author
releng
committed
Sync from server repo (cbf78ae4e5a)
1 parent cf50448 commit caf19d2

8 files changed

+56
-21
lines changed

vclusterops/cluster_op_engine_context.go

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ type opEngineExecContext struct {
2222
networkProfiles map[string]networkProfile
2323
nmaVDatabase nmaVDatabase
2424
upHosts []string // a sorted host list that contains all up nodes
25+
computeHosts []string // a sorted host list that contains all up (COMPUTE) compute nodes
2526
nodesInfo []NodeInfo
2627
scNodesInfo []NodeInfo // a node list contains all nodes in a subcluster
2728

vclusterops/coordinator_database.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,16 @@ func (vdb *VCoordinationDatabase) addNode(vnode *VCoordinationNode) error {
162162
// in all clusters (main and sandboxes)
163163
func (vdb *VCoordinationDatabase) addHosts(hosts []string, scName string,
164164
existingHostNodeMap vHostNodeMap) error {
165-
totalHostCount := len(hosts) + len(existingHostNodeMap)
165+
totalHostCount := len(hosts) + len(existingHostNodeMap) + len(vdb.UnboundNodes)
166166
nodeNameToHost := genNodeNameToHostMap(existingHostNodeMap)
167+
// The GenVNodeName(...) function below will generate node names based on nodeNameToHost and totalHostCount.
168+
// If a name already exists, it won't be re-generated.
169+
// In this case, we need to add unbound node names into this map too.
170+
// Otherwise, the new nodes will reuse the existing unbound node names, then make a clash later on.
171+
for _, vnode := range vdb.UnboundNodes {
172+
nodeNameToHost[vnode.Name] = vnode.Address
173+
}
174+
167175
for _, host := range hosts {
168176
vNode := makeVCoordinationNode()
169177
name, ok := util.GenVNodeName(nodeNameToHost, vdb.Name, totalHostCount)

vclusterops/fetch_database.go

+3
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ func (vcc VClusterCommands) VFetchCoordinationDatabase(options *VFetchCoordinati
122122
}
123123

124124
for h, n := range nmaVDB.HostNodeMap {
125+
if h == util.UnboundedIPv4 || h == util.UnboundedIPv6 {
126+
continue
127+
}
125128
vnode, ok := vdb.HostNodeMap[h]
126129
if !ok {
127130
return vdb, fmt.Errorf("host %s is not found in the vdb object", h)

vclusterops/https_check_subcluster_sandbox_op.go

+6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package vclusterops
1818
import (
1919
"errors"
2020
"fmt"
21+
22+
"github.com/vertica/vcluster/vclusterops/util"
2123
)
2224

2325
type httpsCheckSubclusterSandboxOp struct {
@@ -60,6 +62,10 @@ func (op *httpsCheckSubclusterSandboxOp) setupClusterHTTPRequest(hosts []string)
6062
}
6163

6264
func (op *httpsCheckSubclusterSandboxOp) prepare(execContext *opEngineExecContext) error {
65+
if execContext.computeHosts != nil {
66+
op.hosts = util.SliceDiff(op.hosts, execContext.computeHosts)
67+
}
68+
6369
execContext.dispatcher.setup(op.hosts)
6470

6571
return op.setupClusterHTTPRequest(op.hosts)

vclusterops/https_get_up_nodes_op.go

+20-10
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ func (op *httpsGetUpNodesOp) execute(execContext *opEngineExecContext) error {
138138
func (op *httpsGetUpNodesOp) processResult(execContext *opEngineExecContext) error {
139139
var allErrs error
140140
upHosts := mapset.NewSet[string]()
141+
computeHosts := mapset.NewSet[string]()
141142
upScInfo := make(map[string]string)
142143
exceptionHosts := []string{}
143144
downHosts := []string{}
@@ -148,8 +149,9 @@ func (op *httpsGetUpNodesOp) processResult(execContext *opEngineExecContext) err
148149
op.logResponse(host, result)
149150
if !result.isPassing() {
150151
allErrs = errors.Join(allErrs, result.err)
151-
if result.isUnauthorizedRequest() || result.isInternalError() {
152-
// Authentication error and any unexpected internal server error
152+
if result.isUnauthorizedRequest() || result.isInternalError() || result.hasPreconditionFailed() {
153+
// Authentication error and any unexpected internal server error, plus compute nodes or nodes
154+
// that haven't joined the cluster yet
153155
exceptionHosts = append(exceptionHosts, host)
154156
continue
155157
}
@@ -167,16 +169,15 @@ func (op *httpsGetUpNodesOp) processResult(execContext *opEngineExecContext) err
167169
continue
168170
}
169171

170-
if op.cmdType == StopDBCmd || op.cmdType == StopSubclusterCmd {
171-
err = op.validateHosts(nodesStates)
172-
if err != nil {
173-
allErrs = errors.Join(allErrs, err)
174-
break
175-
}
172+
// For certain commands, check hosts in input against those reported from endpoint
173+
err = op.validateHosts(nodesStates)
174+
if err != nil {
175+
allErrs = errors.Join(allErrs, err)
176+
break
176177
}
177178

178179
// Collect all the up hosts
179-
err = op.collectUpHosts(nodesStates, host, upHosts, upScInfo, sandboxInfo, upScNodes, scNodes)
180+
err = op.collectUpHosts(nodesStates, host, upHosts, computeHosts, upScInfo, sandboxInfo, upScNodes, scNodes)
180181
if err != nil {
181182
allErrs = errors.Join(allErrs, err)
182183
return allErrs
@@ -190,6 +191,7 @@ func (op *httpsGetUpNodesOp) processResult(execContext *opEngineExecContext) err
190191
break
191192
}
192193
}
194+
execContext.computeHosts = computeHosts.ToSlice()
193195
execContext.nodesInfo = upScNodes.ToSlice()
194196
execContext.scNodesInfo = scNodes.ToSlice()
195197
execContext.upHostsToSandboxes = sandboxInfo
@@ -275,6 +277,10 @@ func (op *httpsGetUpNodesOp) processHostLists(upHosts mapset.Set[string], upScIn
275277

276278
// validateHosts can validate if hosts in user input matches the ones in GET /nodes response
277279
func (op *httpsGetUpNodesOp) validateHosts(nodesStates nodesStateInfo) error {
280+
// only needed for the following commands
281+
if !(op.cmdType == StopDBCmd || op.cmdType == StopSubclusterCmd) {
282+
return nil
283+
}
278284
var dbHosts []string
279285
dbUnexpected := false
280286
unexpectedDBName := ""
@@ -310,7 +316,7 @@ func (op *httpsGetUpNodesOp) checkUpHostEligible(node *nodeStateInfo) bool {
310316
return true
311317
}
312318

313-
func (op *httpsGetUpNodesOp) collectUpHosts(nodesStates nodesStateInfo, host string, upHosts mapset.Set[string],
319+
func (op *httpsGetUpNodesOp) collectUpHosts(nodesStates nodesStateInfo, host string, upHosts, computeHosts mapset.Set[string],
314320
upScInfo, sandboxInfo map[string]string, upScNodes, scNodes mapset.Set[NodeInfo]) (err error) {
315321
foundSC := false
316322
for _, node := range nodesStates.NodeList {
@@ -333,6 +339,10 @@ func (op *httpsGetUpNodesOp) collectUpHosts(nodesStates nodesStateInfo, host str
333339
}
334340
}
335341

342+
if node.State == util.NodeComputeState {
343+
computeHosts.Add(node.Address)
344+
}
345+
336346
if op.scName == node.Subcluster {
337347
op.sandbox = node.Sandbox
338348
if node.IsPrimary {

vclusterops/nma_download_file_op.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ type fileContent struct {
208208
Path string `json:"path"`
209209
Usage int `json:"usage"`
210210
} `json:"StorageLocation"`
211+
Sandbox string
211212
}
212213

213214
func (op *nmaDownloadFileOp) processResult(execContext *opEngineExecContext) error {
@@ -278,7 +279,7 @@ func (op *nmaDownloadFileOp) processResult(execContext *opEngineExecContext) err
278279
}
279280

280281
// save descFileContent in vdb
281-
return op.buildVDBFromClusterConfig(descFileContent)
282+
return op.buildVDBFromClusterConfig(&descFileContent)
282283
}
283284

284285
httpsErr := errors.Join(fmt.Errorf("[%s] HTTPS call failed on host %s", op.name, host), result.err)
@@ -299,13 +300,14 @@ func filterPrimaryNodes(descFileContent *fileContent) {
299300
}
300301

301302
// buildVDBFromClusterConfig can build a vdb using cluster_config.json
302-
func (op *nmaDownloadFileOp) buildVDBFromClusterConfig(descFileContent fileContent) error {
303+
func (op *nmaDownloadFileOp) buildVDBFromClusterConfig(descFileContent *fileContent) error {
303304
op.vdb.HostNodeMap = makeVHostNodeMap()
304305
for _, node := range descFileContent.NodeList {
305306
vNode := makeVCoordinationNode()
306307
vNode.Name = node.Name
307308
vNode.Address = node.Address
308309
vNode.IsPrimary = node.IsPrimary
310+
vNode.Sandbox = descFileContent.Sandbox
309311

310312
// remove suffix "/Catalog" from node catalog path
311313
// e.g. /data/test_db/v_test_db_node0002_catalog/Catalog -> /data/test_db/v_test_db_node0002_catalog

vclusterops/start_db.go

+4-6
Original file line numberDiff line numberDiff line change
@@ -133,14 +133,12 @@ func (vcc VClusterCommands) VStartDatabase(options *VStartDatabaseOptions) (vdbP
133133
// VER-93369 may improve this if the CLI knows which nodes are primary
134134
// from the config file
135135
var vdb VCoordinationDatabase
136-
// retrieve database information from cluster_config.json for Eon databases,
137-
// skip this step for starting a sandbox because cluster_config.json does not
138-
// contain accurate info of nodes in a sandbox
139-
if !options.HostsInSandbox && options.IsEon {
136+
// retrieve database information from cluster_config.json for Eon databases
137+
if options.IsEon {
140138
const warningMsg = " for an Eon database, start_db after revive_db could fail " +
141139
util.DBInfo
142140
if options.CommunalStorageLocation != "" {
143-
vdbNew, e := options.getVDBWhenDBIsDown(vcc)
141+
vdbNew, e := options.getVDBFromSandboxWhenDBIsDown(vcc, options.Sandbox)
144142
if e != nil {
145143
// show a warning message if we cannot get VDB from a down database
146144
vcc.Log.PrintWarning(util.CommStorageFail + warningMsg)
@@ -173,7 +171,7 @@ func (vcc VClusterCommands) VStartDatabase(options *VStartDatabaseOptions) (vdbP
173171
clusterOpEngine := makeClusterOpEngine(instructions, options)
174172

175173
// Give the instructions to the VClusterOpEngine to run
176-
runError := clusterOpEngine.run(vcc.Log)
174+
runError := clusterOpEngine.runInSandbox(vcc.Log, &vdb, options.Sandbox)
177175
if runError != nil {
178176
return nil, fmt.Errorf("fail to start database: %w", runError)
179177
}

vclusterops/vcluster_database_options.go

+9-2
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,15 @@ func (opt *DatabaseOptions) normalizePaths() {
288288
opt.DepotPrefix = util.GetCleanPath(opt.DepotPrefix)
289289
}
290290

291-
// getVDBWhenDBIsDown can retrieve db configurations from NMA /nodes endpoint and cluster_config.json when db is down
291+
// getVDBWhenDBIsDown can retrieve db configurations from the NMA /nodes endpoint and cluster_config.json when db is down
292292
func (opt *DatabaseOptions) getVDBWhenDBIsDown(vcc VClusterCommands) (vdb VCoordinationDatabase, err error) {
293+
return opt.getVDBFromSandboxWhenDBIsDown(vcc, util.MainClusterSandbox)
294+
}
295+
296+
// getVDBFromSandboxWhenDBIsDown can retrieve db configurations about a given sandbox
297+
// from the NMA /nodes endpoint and cluster_config.json when db is down
298+
func (opt *DatabaseOptions) getVDBFromSandboxWhenDBIsDown(vcc VClusterCommands,
299+
sandbox string) (vdb VCoordinationDatabase, err error) {
293300
/*
294301
* 1. Get node names for input hosts from NMA /nodes.
295302
* 2. Get other node information for input hosts from cluster_config.json.
@@ -324,7 +331,7 @@ func (opt *DatabaseOptions) getVDBWhenDBIsDown(vcc VClusterCommands) (vdb VCoord
324331
// step 2: get node details from cluster_config.json
325332
vdb2 := VCoordinationDatabase{}
326333
var instructions2 []clusterOp
327-
currConfigFileSrcPath := opt.getCurrConfigFilePath(util.MainClusterSandbox)
334+
currConfigFileSrcPath := opt.getCurrConfigFilePath(sandbox)
328335
nmaDownLoadFileOp, err := makeNMADownloadFileOp(opt.Hosts, currConfigFileSrcPath, currConfigFileDestPath, catalogPath,
329336
opt.ConfigurationParameters, &vdb2)
330337
if err != nil {

0 commit comments

Comments
 (0)