Skip to content

Commit

Permalink
Add support for cliques
Browse files Browse the repository at this point in the history
Signed-off-by: Ritika Srivastava <[email protected]>
  • Loading branch information
ritikasrivastava committed Dec 18, 2024
1 parent 10ac0cf commit 6ea45db
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions pkg/providers/baremetal/mnnvl.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,17 +198,26 @@ func getClusterOutput(ctx context.Context, domainMap map[string]domain, nodes []
}

scanner := bufio.NewScanner(stdout)
cliqueId := ""
clusterUUID := ""
domainName := ""
for scanner.Scan() {
nodeLine := scanner.Text()
arr := strings.Split(nodeLine, ":")
nodeName := arr[0]
clusterUUID := strings.TrimSpace(arr[2])
if !domainIDExists(clusterUUID, domainMap) {
domainMap[clusterUUID] = domain{
itemName := strings.TrimSpace(arr[1])
if itemName == "CliqueId" {
cliqueId = strings.TrimSpace(arr[2])
continue
}
clusterUUID = strings.TrimSpace(arr[2])
domainName = clusterUUID + cliqueId
if !domainIDExists(domainName, domainMap) {
domainMap[domainName] = domain{
nodeMap: make(map[string]bool),
}
}
nodeMap := domainMap[clusterUUID].nodeMap
nodeMap := domainMap[domainName].nodeMap
nodeMap[nodeName] = true
}
if err := scanner.Err(); err != nil {
Expand Down Expand Up @@ -244,7 +253,7 @@ func toGraph(domainMap map[string]domain, treeRoot *topology.Vertex) *topology.V
func generateTopologyConfig(ctx context.Context, cis []topology.ComputeInstances) (*topology.Vertex, error) {
domainMap := make(map[string]domain) // domainID: domain
nodes := getNodeList(cis)
err := getClusterOutput(ctx, domainMap, nodes, "nvidia-smi -q | grep ClusterUUID")
err := getClusterOutput(ctx, domainMap, nodes, `nvidia-smi -q | grep "ClusterUUID\|CliqueId"`)
if err != nil {
return nil, fmt.Errorf("getClusterOutput failed: %v", err)
}
Expand Down

0 comments on commit 6ea45db

Please sign in to comment.