Skip to content

Commit

Permalink
[PLAT-14883][Azure] OS upgrades seems to be leaving over unattached d…
Browse files Browse the repository at this point in the history
…isks

Summary:
Used the same approach of deleting disks after OS upgrade for azure.
Removed duplicate code in VMImageUpgrade.

Test Plan:
1) run OS upgrade with the same image and force=true and force it to fail while deleting disk
2) retry that task (without force=true) - verify that old disk is deleted

Reviewers: svarshney, muthu, nsingh

Reviewed By: muthu

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D37680
  • Loading branch information
yorq committed Sep 16, 2024
1 parent d6e81df commit 2ba8180
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 46 deletions.
4 changes: 4 additions & 0 deletions managed/devops/opscli/ybops/cloud/azure/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,10 @@ def edit_dns_record_set(self, dns_zone_id, domain_name_prefix, ip_list):
def delete_dns_record_set(self, dns_zone_id, domain_name_prefix):
return self.get_admin().delete_dns_record_set(dns_zone_id, domain_name_prefix)

def delete_volumes(self, args):
tags = json.loads(args.instance_tags) if args.instance_tags is not None else {}
return self.get_admin().delete_disks(tags)

def modify_tags(self, args):
instance = self.get_host_info(args)
if not instance:
Expand Down
6 changes: 1 addition & 5 deletions managed/devops/opscli/ybops/cloud/azure/method.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,7 @@ def __init__(self, base_command):
super(AzureReplaceRootVolumeMethod, self).__init__(base_command)

def _mount_root_volume(self, host_info, volume):
curr_root_vol = host_info["root_volume"]
self.cloud.mount_disk(host_info, volume)
disk_del = self.cloud.get_admin().delete_disk(curr_root_vol)
disk_del.wait()
logging.info("[app] Successfully deleted old OS disk {}".format(curr_root_vol))

def _host_info_with_current_root_volume(self, args, host_info):
return (host_info, host_info.get("root_volume"))
Expand Down Expand Up @@ -312,7 +308,7 @@ def __init__(self, base_command):
super(AzureDeleteRootVolumesMethod, self).__init__(base_command)

def delete_volumes(self, args):
pass
self.cloud.delete_volumes(args)


class AzurePauseInstancesMethod(AbstractInstancesMethod):
Expand Down
20 changes: 20 additions & 0 deletions managed/devops/opscli/ybops/cloud/azure/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,26 @@ def update_os_disk(self, vm_name, os_disk):
def delete_disk(self, disk_name):
return self.compute_client.disks.begin_delete(RESOURCE_GROUP, os.path.basename(disk_name))

def delete_disks(self, tags):
if not tags:
raise YBOpsRuntimeError('Tags must be specified')
universe_uuid = tags.get('universe-uuid')
if universe_uuid is None:
raise YBOpsRuntimeError('Universe UUID must be specified')
node_uuid = tags.get('node-uuid')
if node_uuid is None:
raise YBOpsRuntimeError('Node UUID must be specified')
disk_list = self.compute_client.disks.list_by_resource_group(RESOURCE_GROUP)
if disk_list:
for disk in disk_list:
if (disk.disk_state == "Unattached" and disk.tags
and disk.tags.get('universe-uuid') == universe_uuid
and disk.tags.get('node-uuid') == node_uuid):
logging.info("[app] Deleting disk {}".format(disk.name))
disk_del = self.delete_disk(disk.name)
disk_del.wait()
logging.info("[app] Deleted disk {}".format(disk.name))

def tag_disks(self, vm, tags):
# Updating requires Disk as input rather than OSDisk. Retrieve Disk class with OSDisk name.
disk = self.compute_client.disks.get(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@
import com.yugabyte.yw.models.helpers.NodeDetails.NodeState;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -145,11 +145,26 @@ public void run() {
});
}

private void createVMImageUpgradeTasks(Set<NodeDetails> nodes) {
createRootVolumeCreationTasks(nodes).setSubTaskGroupType(getTaskSubGroupType());
private static class ImageSettings {
final String machineImage;
final String sshUserOverride;
final Integer sshPortOverride;
final UUID imageBundleUUID;

private ImageSettings(
String machineImage,
String sshUserOverride,
Integer sshPortOverride,
UUID imageBundleUUID) {
this.machineImage = machineImage;
this.sshUserOverride = sshUserOverride;
this.sshPortOverride = sshPortOverride;
this.imageBundleUUID = imageBundleUUID;
}
}

Map<UUID, UUID> clusterToImageBundleMap = new HashMap<>();
Universe universe = getUniverse();
private Map<NodeDetails, ImageSettings> getImageSettingsForNodes(Set<NodeDetails> nodes) {
Map<NodeDetails, ImageSettings> result = new LinkedHashMap<>();
UUID imageBundleUUID;
for (NodeDetails node : nodes) {
UUID region = taskParams().nodeToRegion.get(node.nodeUuid);
Expand Down Expand Up @@ -188,6 +203,25 @@ private void createVMImageUpgradeTasks(Set<NodeDetails> nodes) {
machineImage);
continue;
}
result.put(
node, new ImageSettings(machineImage, sshUserOverride, sshPortOverride, imageBundleUUID));
}
return result;
}

private void createVMImageUpgradeTasks(Set<NodeDetails> nodes) {
Map<NodeDetails, ImageSettings> imageSettingsMap = getImageSettingsForNodes(nodes);

createRootVolumeCreationTasks(imageSettingsMap).setSubTaskGroupType(getTaskSubGroupType());

Map<UUID, UUID> clusterToImageBundleMap = new HashMap<>();
Universe universe = getUniverse();
for (NodeDetails node : imageSettingsMap.keySet()) {
ImageSettings imageSettings = imageSettingsMap.get(node);
final UUID imageBundleUUID = imageSettings.imageBundleUUID;
final String sshUserOverride = imageSettings.sshUserOverride;
final Integer sshPortOverride = imageSettings.sshPortOverride;
final String machineImage = imageSettings.machineImage;
Set<UniverseTaskBase.ServerType> processTypes = new LinkedHashSet<>();
if (node.isMaster) {
processTypes.add(ServerType.MASTER);
Expand Down Expand Up @@ -310,49 +344,18 @@ private void createVMImageUpgradeTasks(Set<NodeDetails> nodes) {
.setSubTaskGroupType(getTaskSubGroupType());
}

private SubTaskGroup createRootVolumeCreationTasks(Collection<NodeDetails> nodes) {
private SubTaskGroup createRootVolumeCreationTasks(Map<NodeDetails, ImageSettings> settingsMap) {
Map<UUID, List<NodeDetails>> rootVolumesPerAZ =
nodes.stream().collect(Collectors.groupingBy(n -> n.azUuid));
settingsMap.keySet().stream().collect(Collectors.groupingBy(n -> n.azUuid));
SubTaskGroup subTaskGroup = createSubTaskGroup("CreateRootVolumes");
Universe universe = getUniverse();

rootVolumesPerAZ.forEach(
(key, value) -> {
NodeDetails node = value.get(0);
UUID region = taskParams().nodeToRegion.get(node.nodeUuid);
String updatedMachineImage = "";
if (taskParams().imageBundles != null && taskParams().imageBundles.size() > 0) {
UUID imageBundleUUID = retrieveImageBundleUUID(taskParams().imageBundles, node);
ImageBundle.NodeProperties toOverwriteNodeProperties =
imageBundleUtil.getNodePropertiesOrFail(
imageBundleUUID, node.cloudInfo.region, node.cloudInfo.cloud);
updatedMachineImage = toOverwriteNodeProperties.getMachineImage();
} else {
// Backward compatiblity.
updatedMachineImage = taskParams().machineImages.get(region);
}
final String machineImage = updatedMachineImage;
int numVolumes = value.size();

if (!taskParams().forceVMImageUpgrade) {
numVolumes =
(int)
value.stream()
.filter(
n -> {
String existingMachineImage = n.machineImage;
if (StringUtils.isBlank(existingMachineImage)) {
existingMachineImage = retreiveMachineImageForNode(n);
}
return !machineImage.equals(existingMachineImage);
})
.count();
}
ImageSettings imageSettings = settingsMap.get(node);

if (numVolumes == 0) {
log.info("Nothing to upgrade in AZ {}", node.cloudInfo.az);
return;
}
final String machineImage = imageSettings.machineImage;
int numVolumes = value.size();

CreateRootVolumes.Params params = new CreateRootVolumes.Params();
Cluster cluster = taskParams().getClusterByUuid(node.placementUuid);
Expand Down

0 comments on commit 2ba8180

Please sign in to comment.