From 6f7a3a5ed457b98d886a12685e968f52ea792e42 Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Tue, 11 Jul 2023 11:23:29 +0200 Subject: [PATCH 01/17] Skipping host scope and pools in maintenance --- live_migrate_virtual_machine.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/live_migrate_virtual_machine.py b/live_migrate_virtual_machine.py index ebb48e6..609143a 100755 --- a/live_migrate_virtual_machine.py +++ b/live_migrate_virtual_machine.py @@ -171,6 +171,10 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a storage_pools = sorted(target_cluster.get_storage_pools(), key=lambda h: h['disksizeused']) for storage_pool in storage_pools: + if storage_pool['scope'] == 'HOST': + continue + if storage_pool['state'] == 'Maintenance': + continue free_space_bytes = int(storage_pool['disksizetotal']) - int(storage_pool['disksizeused']) needed_bytes = volume['size'] * 1.5 if needed_bytes >= free_space_bytes: From 027bcc04a206d782d7db1bb2f6b34bf95c35c6ca Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Thu, 13 Jul 2023 13:07:45 +0200 Subject: [PATCH 02/17] Fix for skipped volumes --- live_migrate_virtual_machine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/live_migrate_virtual_machine.py b/live_migrate_virtual_machine.py index 609143a..710b129 100755 --- a/live_migrate_virtual_machine.py +++ b/live_migrate_virtual_machine.py @@ -189,7 +189,7 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a if volume['storage'] == target_storage_pool['name']: logging.warning( - f"Volume '{volume['name']}' ({volume['id']}) already on cluster '{target_cluster['name']}', skipping..") + f"Volume '{volume['name']}' ({volume['id']}) already on cluster '{target_cluster['name']}/{target_storage_pool['name']}', skipping..") volumes.pop(volume_id) continue @@ -242,6 +242,9 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a logging.error(f"Cannot migrate, VM has state: '{vm_instance['state']}'") sys.exit(1) + # skip if we did not rsync the volume + if volume['id'] not in volume_destination_map: + continue target_storage_pool = volume_destination_map[volume['id']]['target_storage_pool'] source_storage_pool = volume_destination_map[volume['id']]['source_storage_pool'] source_host_id = volume_destination_map[volume['id']]['source_host_id'] From 2822709a507b8d46ab622281ee7932347b2537e5 Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Thu, 13 Jul 2023 13:17:56 +0200 Subject: [PATCH 03/17] Avoid storage pool option --- live_migrate_virtual_machine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/live_migrate_virtual_machine.py b/live_migrate_virtual_machine.py index 710b129..84042c6 100755 --- a/live_migrate_virtual_machine.py +++ b/live_migrate_virtual_machine.py @@ -36,6 +36,7 @@ @click.option('--add-affinity-group', metavar='', help='Add this affinity group after migration') @click.option('--destination-dc', '-d', metavar='', help='Migrate to this datacenter') @click.option('--is-project-vm', is_flag=True, help='The specified VM is a project VM') +@click.option('--avoid-storage-pool', default=None, help='Do not attempt migrate to this storage pool') @click.option('--skip-backingfile-merge', is_flag=True, help='Do not attempt merge backing file') @click.option('--skip-within-cluster', is_flag=True, default=False, show_default=True, help='Enable/disable migration within cluster') @@ -44,7 +45,7 @@ @click.argument('vm') @click.argument('cluster') def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, add_affinity_group, destination_dc, is_project_vm, - skip_backingfile_merge, skip_within_cluster, dry_run, vm, cluster): + avoid_storage_pool, skip_backingfile_merge, skip_within_cluster, dry_run, vm, cluster): """Live migrate VM to CLUSTER""" """Unless --migrate-offline-with-rsync is passed, then we migrate offline""" @@ -175,6 +176,8 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a continue if storage_pool['state'] == 'Maintenance': continue + if storage_pool['name'] == avoid_storage_pool: + continue free_space_bytes = int(storage_pool['disksizetotal']) - int(storage_pool['disksizeused']) needed_bytes = volume['size'] * 1.5 if needed_bytes >= free_space_bytes: From aa83b76f5f6fbea842829bd6a1623d6dd13cf648 Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Thu, 13 Jul 2023 13:35:22 +0200 Subject: [PATCH 04/17] Fix timeout issue --- live_migrate_virtual_machine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/live_migrate_virtual_machine.py b/live_migrate_virtual_machine.py index 84042c6..92c68c2 100755 --- a/live_migrate_virtual_machine.py +++ b/live_migrate_virtual_machine.py @@ -285,12 +285,13 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a # Start vm again if needed if auto_start_vm: + vm_instance = co.get_vm(name=vm, is_project_vm=is_project_vm) # Make sure status is stopped if not dry_run: retry_count = 0 while vm_instance['state'] != 'Stopped': logging.info(f"VM '{vm}' has state '{vm_instance['state']}': waiting for status 'Stopped'") - vm_instance = co.get_vm(name=vm, is_project_vm=is_project_vm) + vm_instance.refresh() time.sleep(15) retry_count += 1 if retry_count > 6: From 0ecc547c74b059e771ecd4777c9ddc721f1ad59a Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Thu, 13 Jul 2023 21:00:26 +0200 Subject: [PATCH 05/17] Fix skipping dedicated hosts and output --- cosmicops/objects/cluster.py | 3 +++ live_migrate_virtual_machine.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cosmicops/objects/cluster.py b/cosmicops/objects/cluster.py index 7ad7a1e..9cd1c28 100644 --- a/cosmicops/objects/cluster.py +++ b/cosmicops/objects/cluster.py @@ -63,6 +63,9 @@ def find_migration_host(self, vm): if vm_on_dedicated_hv and not host['dedicated']: continue + if not vm_on_dedicated_hv and host['dedicated']: + continue + if vm_on_dedicated_hv and host['affinitygroupid'] != dedicated_affinity_id: continue diff --git a/live_migrate_virtual_machine.py b/live_migrate_virtual_machine.py index 92c68c2..d963509 100755 --- a/live_migrate_virtual_machine.py +++ b/live_migrate_virtual_machine.py @@ -299,11 +299,11 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a destination_host = target_cluster.find_migration_host(vm_instance) if not destination_host: - logging.error(f"Starting failed for VM '{vm_instance['state']}': no destination host found", log_to_slack=True) + logging.error(f"Starting failed for VM '{vm_instance['name']}': no destination host found", log_to_slack=True) sys.exit(1) # Start on a specific host to prevent unwanted migrations back to source if not vm_instance.start(destination_host): - logging.error(f"Starting failed for VM '{vm_instance['state']}'", log_to_slack=True) + logging.error(f"Starting failed for VM '{vm_instance['name']}'", log_to_slack=True) sys.exit(1) logging.info(f"VM Migration completed at {datetime.now().strftime('%d-%m-%Y %H:%M:%S')}\n") From 178fed843936cedf1c65a47b4f12246f65a0061f Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Thu, 20 Jul 2023 17:01:48 +0200 Subject: [PATCH 06/17] Allow volume migration for routers --- cosmicops/objects/host.py | 40 ++++++++++----------- cosmicops/ops.py | 8 ++--- live_migrate_virtual_machine.py | 2 +- live_migrate_virtual_machine_volumes.py | 46 ++++++++++++++++++------- 4 files changed, 59 insertions(+), 37 deletions(-) diff --git a/cosmicops/objects/host.py b/cosmicops/objects/host.py index 2f5236d..4a5a02e 100644 --- a/cosmicops/objects/host.py +++ b/cosmicops/objects/host.py @@ -467,10 +467,10 @@ def wait_for_agent(self): time.sleep(5) - def get_disks(self, vm): + def get_disks(self, vm_instance): lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system") - domain = lv.lookupByName(vm['instancename']) + domain = lv.lookupByName(vm_instance) tree = ElementTree.fromstring(domain.XMLDesc()) block_devs = tree.findall('devices/disk') @@ -498,24 +498,24 @@ def get_disks(self, vm): return disk_data - def get_domjobinfo(self, vm): + def get_domjobinfo(self, vm_instance): try: lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system") all_domains = lv.listAllDomains() - if any([x for x in all_domains if x.name() == vm]): - domain = lv.lookupByName(vm) + if any([x for x in all_domains if x.name() == vm_instance]): + domain = lv.lookupByName(vm_instance) domjobinfo = domain.jobInfo() return DomJobInfo.from_list(domjobinfo) except libvirt.libvirtError as _: pass # Ignore exception return DomJobInfo() - def get_domjobstats(self, vm, correction=True): + def get_domjobstats(self, vm_instance, correction=True): try: lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system") all_domains = lv.listAllDomains() - if any([x for x in all_domains if x.name() == vm]): - domain = lv.lookupByName(vm) + if any([x for x in all_domains if x.name() == vm_instance]): + domain = lv.lookupByName(vm_instance) domjobstats = domain.jobStats() memory_total = domjobstats.get('memory_total', 0) if correction: @@ -541,14 +541,14 @@ def get_domjobstats(self, vm, correction=True): pass # Ignore exception return DomJobInfo() - def get_blkjobinfo(self, vm, volume): + def get_blkjobinfo(self, vm_instance, volume): try: - disks = self.get_disks(vm) + disks = self.get_disks(vm_instance) disk = dict(filter(lambda x: x[0] == volume, disks.items())) lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system") all_domains = lv.listAllDomains() - if any([x for x in all_domains if x.name() == vm['instancename']]): - domain = lv.lookupByName(vm['instancename']) + if any([x for x in all_domains if x.name() == vm_instance]): + domain = lv.lookupByName(vm_instance) blkjobinfo = domain.blockJobInfo(disk[volume]['dev'], 0) return BlkJobInfo( jobType=blkjobinfo.get('type', 0), @@ -560,27 +560,27 @@ def get_blkjobinfo(self, vm, volume): pass # Ignore exception return BlkJobInfo() - def set_iops_limit(self, vm, max_iops): + def set_iops_limit(self, vm_instance, max_iops): command = f""" - for i in $(/usr/bin/virsh domblklist --details '{vm['name']}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do - /usr/bin/virsh blkdeviotune '{vm['name']}' $i --total-iops-sec {max_iops} --live + for i in $(/usr/bin/virsh domblklist --details '{vm_instance}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do + /usr/bin/virsh blkdeviotune '{vm_instance}' $i --total-iops-sec {max_iops} --live done """ if not self.execute(command, sudo=True).return_code == 0: - logging.error(f"Failed to set IOPS limit for '{vm['name']}'") + logging.error(f"Failed to set IOPS limit for '{vm_instance}'") return False else: return True - def merge_backing_files(self, vm): + def merge_backing_files(self, vm_instance): command = f""" - for i in $(/usr/bin/virsh domblklist --details '{vm['instancename']}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do - /usr/bin/virsh blockpull '{vm['instancename']}' $i --wait --verbose + for i in $(/usr/bin/virsh domblklist --details '{vm_instance}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do + /usr/bin/virsh blockpull '{vm_instance}' $i --wait --verbose done """ if not self.execute(command, sudo=True).return_code == 0: - logging.error(f"Failed to merge backing volumes for '{vm['name']}'") + logging.error(f"Failed to merge backing volumes for '{vm_instance}'") return False else: return True diff --git a/cosmicops/ops.py b/cosmicops/ops.py index 1486411..72e6fe8 100644 --- a/cosmicops/ops.py +++ b/cosmicops/ops.py @@ -283,7 +283,7 @@ def wait_for_vm_migration_job(self, job_id, retries=10, domjobinfo=True, source_ print() return status - def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, source_host=None, vm=None): + def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, source_host=None, vm_instance=None): prev_percentage = 0. # Hack to wait for job to start @@ -294,8 +294,8 @@ def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, sour logging.error(f"Error: Could not find volume '{volume_id}'") return False - if blkjobinfo and source_host and vm: - blkjobinfo = source_host.get_blkjobinfo(vm, volume['path']) + if blkjobinfo and source_host and vm_instance: + blkjobinfo = source_host.get_blkjobinfo(vm_instance, volume['path']) cur_percentage = float(blkjobinfo.current / (blkjobinfo.end or 1) * 100) if cur_percentage > prev_percentage: prev_percentage = cur_percentage @@ -308,7 +308,7 @@ def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, sour logging.debug(f"Volume '{volume_id}' is in {volume['state']} state and not Ready. Sleeping.") # Return result of job status = self.wait_for_job(job_id=job_id, retries=1) - if blkjobinfo and source_host and vm and status: + if blkjobinfo and source_host and vm_instance and status: print("100% ") else: print() diff --git a/live_migrate_virtual_machine.py b/live_migrate_virtual_machine.py index d963509..f94d2cf 100755 --- a/live_migrate_virtual_machine.py +++ b/live_migrate_virtual_machine.py @@ -553,7 +553,7 @@ def temp_migrate_volume(co, dry_run, log_to_slack, volume, vm, target_pool_name) f"Migrating volume '{volume['name']}' of VM '{vm['name']}' to pool '{target_pool_name}'", log_to_slack=log_to_slack) - if not volume.migrate(target_storage_pool, live_migrate=True, source_host=source_host, vm=vm): + if not volume.migrate(target_storage_pool, live_migrate=True, source_host=source_host, vm_instance=vm['instancename']): logging.error(f"Migration failed for volume '{volume['name']}' of VM '{vm['name']}'", log_to_slack=log_to_slack) return False diff --git a/live_migrate_virtual_machine_volumes.py b/live_migrate_virtual_machine_volumes.py index 8d87847..6b1ac86 100755 --- a/live_migrate_virtual_machine_volumes.py +++ b/live_migrate_virtual_machine_volumes.py @@ -28,12 +28,13 @@ @click.option('--max-iops', '-m', metavar='<# IOPS>', default=1000, show_default=True, help='Limit amount of IOPS used during migration, use 0 to disable') @click.option('--zwps-to-cwps', is_flag=True, help='Migrate from ZWPS to CWPS') +@click.option('--is-router', is_flag=True, help='The specified VM is a router') @click.option('--is-project-vm', is_flag=True, help='The specified VM is a project VM') @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) @click.argument('vm') @click.argument('storage_pool') -def main(profile, max_iops, zwps_to_cwps, is_project_vm, dry_run, vm, storage_pool): +def main(profile, max_iops, zwps_to_cwps, is_router, is_project_vm, dry_run, vm, storage_pool): """Live migrate VM volumes to STORAGE_POOL""" click_log.basic_config() @@ -50,12 +51,12 @@ def main(profile, max_iops, zwps_to_cwps, is_project_vm, dry_run, vm, storage_po cs = CosmicSQL(server=profile, dry_run=dry_run) - if not live_migrate_volumes(storage_pool, co, cs, dry_run, is_project_vm, log_to_slack, max_iops, vm, + if not live_migrate_volumes(storage_pool, co, cs, dry_run, is_router, is_project_vm, log_to_slack, max_iops, vm, zwps_to_cwps): sys.exit(1) -def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_vm, log_to_slack, max_iops, vm_name, zwps_to_cwps): +def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_router, is_project_vm, log_to_slack, max_iops, vm_name, zwps_to_cwps): target_storage_pool = co.get_storage_pool(name=target_storage_pool_name) if not target_storage_pool: return False @@ -63,25 +64,41 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v # disable setting max IOPS, if max_iops != 0 set_max_iops = max_iops != 0 - vm = co.get_vm(name=vm_name, is_project_vm=is_project_vm) + if is_router: + vm = co.get_router(name=vm_name,is_project_router=is_project_vm) + else: + vm = co.get_vm(name=vm_name, is_project_vm=is_project_vm) if not vm: + logging.error(f"Failed to find VM with name: '{vm_name}'", log_to_slack=False) return False - logging.instance_name = vm['instancename'] + if is_router: + logging.instance_name = vm['name'] + vm_instance = vm['name'] + else: + logging.instance_name = vm['instancename'] + vm_instance = vm['instancename'] logging.slack_value = vm['domain'] logging.vm_name = vm['name'] logging.zone_name = vm['zonename'] + if vm['state'] != 'Running': + logging.error(f"Failed, VM with name: '{vm_name}' is not in state 'Running'!", log_to_slack=False) + return False + + logging.info( f"Starting live migration of volumes of VM '{vm['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})", log_to_slack=log_to_slack) host = co.get_host(id=vm['hostid']) if not host: + logging.error(f"Failed to get host with host_id: '{vm['hostid']}'", log_to_slack=False) return False cluster = co.get_cluster(id=host['clusterid']) if not cluster: + logging.error(f"Failed to get cluster with cluster_id: '{host['clusterid']}'", log_to_slack=False) return False logging.cluster = cluster['name'] @@ -97,7 +114,7 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v logging.info('Would have changed the diskoffering from ZWPS to CWPS of all ZWPS volumes') if not dry_run: - disk_info = host.get_disks(vm) + disk_info = host.get_disks(vm_instance) for path, disk_info in disk_info.items(): _, path, _, _, size = cs.get_volume_size(path) @@ -108,7 +125,7 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v if set_max_iops: if not dry_run: - if not host.set_iops_limit(vm, max_iops): + if not host.set_iops_limit(vm_instance, max_iops): return False else: logging.info( @@ -118,9 +135,9 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v f'Not setting an IOPS limit as it is disabled') if not dry_run: - if not host.merge_backing_files(vm): + if not host.merge_backing_files(vm_instance): if set_max_iops: - host.set_iops_limit(vm, 0) + host.set_iops_limit(vm_instance, 0) return False else: logging.info( @@ -157,7 +174,7 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v # get the source host to read the blkjobinfo source_host = co.get_host(id=vm['hostid']) - if not volume.migrate(target_storage_pool, live_migrate=True, source_host=source_host, vm=vm): + if not volume.migrate(target_storage_pool, live_migrate=True, source_host=source_host, vm_instance=vm_instance): continue with click_spinner.spinner(): @@ -178,8 +195,13 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v logging.info( f"Finished live migration of volumes of VM '{vm['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})", log_to_slack=log_to_slack) - if not dry_run: - host.set_iops_limit(vm, 0) + + if set_max_iops: + if not dry_run: + host.set_iops_limit(vm_instance, 0) + else: + logging.info( + f"Would have disable an IOPS limit") return True From db7a5130d5642a8e7c75ab317593201cee1a5e8c Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Fri, 21 Jul 2023 09:07:45 +0200 Subject: [PATCH 07/17] Added source storage pool option for offline migrations --- migrate_offline_volumes.py | 74 ++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/migrate_offline_volumes.py b/migrate_offline_volumes.py index aa20b9c..a6bb73a 100755 --- a/migrate_offline_volumes.py +++ b/migrate_offline_volumes.py @@ -32,10 +32,11 @@ @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') @click.option('--destination-cluster-name', help='Name of the destination cluster') @click.option('--destination-pool-name', help='Name of the destination pool') +@click.option('--source-pool-name', help='Name of the source pool') @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) @click.argument('source_cluster_name') def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, skip_domains, only_project, - source_cluster_name, destination_cluster_name, destination_pool_name): + source_cluster_name, destination_cluster_name, destination_pool_name, source_pool_name): """Migrate offline volumes from SOURCE_CLUSTER to DESTINATION_CLUSTER""" click_log.basic_config() @@ -52,6 +53,10 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk logging.error('Destination cluster cannot be the source cluster!') sys.exit(1) + if source_pool_name == destination_pool_name: + logging.error('Destination cluster cannot be the source cluster!') + sys.exit(1) + if dry_run: logging.warning('Running in dry-run mode, will only show changes') @@ -59,11 +64,28 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk cs = CosmicSQL(server=profile, dry_run=dry_run) source_cluster = co.get_cluster(name=source_cluster_name) - source_storage_pools = co.get_all_storage_pools(name=source_cluster_name) - if not source_cluster and not source_storage_pools: + if not source_cluster: logging.error(f"Source cluster not found:'{source_cluster_name}'!") sys.exit(1) + if not source_pool_name: + try: + source_storage_pools = source_cluster.get_storage_pools(scope='CLUSTER') + except IndexError: + logging.error(f"No storage pools found for cluster '{source_cluster['name']}'") + sys.exit(1) + else: + source_storage_pool = co.get_storage_pool(name=source_pool_name) + if not source_storage_pool: + logging.error(f"Source storage pool not found '{source_pool_name}'") + sys.exit(1) + else: + if source_storage_pool['clustername'].upper() != source_cluster_name.upper(): + logging.error(f"Source storage pool '{source_pool_name}' is not part of the source cluster '{source_cluster_name}'") + sys.exit(1) + source_storage_pools = [source_storage_pool] + + destination_cluster = None if destination_cluster_name: destination_cluster = co.get_cluster(name=destination_cluster_name) @@ -71,17 +93,6 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk logging.error(f"Destination cluster not found:'{destination_cluster_name}'!") sys.exit(1) - if source_cluster: - try: - source_storage_pools = source_cluster.get_storage_pools(scope='CLUSTER') - except IndexError: - logging.error(f"No storage pools found for cluster '{source_cluster['name']}'") - sys.exit(1) - - logging.info('Source storage pools found:') - for source_storage_pool in source_storage_pools: - logging.info(f" - '{source_storage_pool['name']}'") - destination_storage_pools = None if destination_cluster: try: @@ -89,9 +100,23 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk except IndexError: logging.error(f"No storage pools found for cluster '{destination_cluster['name']}'") sys.exit(1) - logging.info('Destination storage pools found:') - for destination_storage_pool in destination_storage_pools: - logging.info(f" - '{destination_storage_pool['name']}'") + + if destination_pool_name: + destination_storage_pool = co.get_storage_pool(name=destination_pool_name) + if not destination_storage_pool: + logging.error(f"Destination storage pool not found '{destination_pool_name}'") + sys.exit(1) + else: + destination_storage_pools = [destination_storage_pool] + + logging.info('Source storage pools found:') + for source_storage_pool in source_storage_pools: + if source_pool_name and source_storage_pool['name'] != source_pool_name: + continue + logging.info(f" - '{source_storage_pool['name']}'") + logging.info('Destination storage pools found:') + for destination_storage_pool in destination_storage_pools: + logging.info(f" - '{destination_storage_pool['name']}'") if ignore_volumes: ignore_volumes = ignore_volumes.replace(' ', '').split(',') @@ -106,10 +131,9 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk logging.info(f"Skipping domains: {str(skip_domains)}") for source_storage_pool in source_storage_pools: - if destination_storage_pools is not None: - destination_storage_pool = choice(destination_storage_pools) - else: - destination_storage_pool = co.get_storage_pool(name=destination_pool_name) + if source_pool_name and source_storage_pool['name'] != source_pool_name: + continue + destination_storage_pool = choice(destination_storage_pools) volumes = source_storage_pool.get_volumes(only_project) @@ -154,12 +178,8 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk logging.info( f"Would have changed the diskoffering for volume '{volume['name']}' to CWPS before starting the migration") - if source_cluster: - logging.info( - f"Volume '{volume['name']}' will be migrated from cluster '{source_storage_pool['name']}' to '{destination_storage_pool['name']}'") - else: - logging.info( - f"Volume '{volume['name']}' will be migrated from storage pool '{source_storage_pool['name']}' to '{destination_storage_pool['name']}'") + logging.info( + f"Volume '{volume['name']}' will be migrated from storage pool '{source_storage_pool['name']}' to '{destination_storage_pool['name']}'") if not volume.migrate(destination_storage_pool): continue From ba03d706e75f32dce518d9a78c0eb0a14418e796 Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Tue, 10 Oct 2023 10:43:19 +0200 Subject: [PATCH 08/17] Adding multitests for host waiting online --- cosmicops/objects/host.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/cosmicops/objects/host.py b/cosmicops/objects/host.py index 4a5a02e..b469698 100644 --- a/cosmicops/objects/host.py +++ b/cosmicops/objects/host.py @@ -421,13 +421,17 @@ def wait_until_online(self): else: logging.info(f"Waiting for '{self['name']}' to come back online", self.log_to_slack) with click_spinner.spinner(): - while True: - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(5) - result = s.connect_ex((self['name'], 22)) - - if result == 0: - break + tests = 0 + while tests < 3: + while True: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(5) + result = s.connect_ex((self['name'], 22)) + + if result == 0: + break + time.sleep(20) + tests += 1 if self.dry_run: logging.info(f"Would wait for libvirt on '{self['name']}'") From 5bb0647596c0793a69ff5ba949ba105fd9097155 Mon Sep 17 00:00:00 2001 From: Remi Bergsma Date: Thu, 24 Aug 2023 13:37:44 +0200 Subject: [PATCH 09/17] Fix scope, needs caps --- live_migrate_virtual_machine_volumes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/live_migrate_virtual_machine_volumes.py b/live_migrate_virtual_machine_volumes.py index 6b1ac86..92be34f 100755 --- a/live_migrate_virtual_machine_volumes.py +++ b/live_migrate_virtual_machine_volumes.py @@ -153,7 +153,7 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_router, i if not source_storage_pool: continue - if source_storage_pool['scope'] == 'Host' or (source_storage_pool['scope'] == 'ZONE' and not zwps_to_cwps): + if source_storage_pool['scope'] == 'HOST' or (source_storage_pool['scope'] == 'ZONE' and not zwps_to_cwps): logging.warning(f"Skipping volume '{volume['name']}' as it's scope is '{source_storage_pool['scope']}'", log_to_slack=log_to_slack) continue From 2a0933283811ac5479b2f495ff73ee58bbb9c57b Mon Sep 17 00:00:00 2001 From: Remi Bergsma Date: Thu, 24 Aug 2023 14:12:41 +0200 Subject: [PATCH 10/17] Skip disks without a file --- cosmicops/objects/host.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cosmicops/objects/host.py b/cosmicops/objects/host.py index b469698..a95d793 100644 --- a/cosmicops/objects/host.py +++ b/cosmicops/objects/host.py @@ -487,6 +487,9 @@ def get_disks(self, vm_instance): dev = disk.find('target').get('dev') full_path = disk.find('source').get('file') + if full_path is None: + logging.info(f"Skipping disk without a file (NVMe?)") + continue _, _, pool, path = full_path.split('/') size, _, _ = domain.blockInfo(dev) From 02147ea2003f50d180739b52295b231e5ed5cc4b Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Tue, 10 Oct 2023 10:47:30 +0200 Subject: [PATCH 11/17] Adding comment --- cosmicops/objects/host.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cosmicops/objects/host.py b/cosmicops/objects/host.py index a95d793..3117a85 100644 --- a/cosmicops/objects/host.py +++ b/cosmicops/objects/host.py @@ -421,6 +421,8 @@ def wait_until_online(self): else: logging.info(f"Waiting for '{self['name']}' to come back online", self.log_to_slack) with click_spinner.spinner(): + # adding retry tests, so we need to be able to connect to SSH three times in one minute + # before we consider the host up tests = 0 while tests < 3: while True: From e164f7b93d3b19ca9e2541d9d4bbb3d49bdc2dd1 Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Tue, 10 Oct 2023 17:38:48 +0200 Subject: [PATCH 12/17] Added output for testing for SSH --- cosmicops/objects/host.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cosmicops/objects/host.py b/cosmicops/objects/host.py index 3117a85..e931be7 100644 --- a/cosmicops/objects/host.py +++ b/cosmicops/objects/host.py @@ -423,8 +423,9 @@ def wait_until_online(self): with click_spinner.spinner(): # adding retry tests, so we need to be able to connect to SSH three times in one minute # before we consider the host up - tests = 0 - while tests < 3: + tests = 1 + logging.info(f"Waiting for SSH connection, attempt {tests} of 3", False) + while tests <= 3: while True: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.settimeout(5) From 029115bc4bbddf9e5e3c6cb4cfb57b467c380624 Mon Sep 17 00:00:00 2001 From: Remi Bergsma Date: Wed, 11 Oct 2023 15:46:38 +0200 Subject: [PATCH 13/17] Specify target_host --- cosmicops/empty_host.py | 7 +++++-- empty_host.py | 5 +++-- rolling_reboot.py | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cosmicops/empty_host.py b/cosmicops/empty_host.py index f29b149..89c8b48 100644 --- a/cosmicops/empty_host.py +++ b/cosmicops/empty_host.py @@ -17,7 +17,7 @@ from cosmicops import CosmicOps, RebootAction, logging -def empty_host(profile, shutdown, skip_disable, dry_run, host): +def empty_host(profile, shutdown, skip_disable, dry_run, host, target_host): click_log.basic_config() log_to_slack = True @@ -35,7 +35,10 @@ def empty_host(profile, shutdown, skip_disable, dry_run, host): if not host.disable(): raise RuntimeError(f"Failed to disable host '{host['name']}'") - (total, success, failed) = host.empty() + if target_host: + target_host = co.get_host(name=target_host) + + (total, success, failed) = host.empty(target=target_host) result_message = f"Result: {success} successful, {failed} failed out of {total} total VMs" if not failed and shutdown: diff --git a/empty_host.py b/empty_host.py index b4effbd..3ca4601 100755 --- a/empty_host.py +++ b/empty_host.py @@ -30,7 +30,8 @@ @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) @click.argument('host') -def main(profile, shutdown, skip_disable, dry_run, host): +@click.option('--target-host', help='Target hypervisor the migrate VMS to', required=False) +def main(profile, shutdown, skip_disable, dry_run, host, target_host): """Empty HOST by migrating VMs to another host in the same cluster.""" click_log.basic_config() @@ -39,7 +40,7 @@ def main(profile, shutdown, skip_disable, dry_run, host): logging.info('Running in dry-run mode, will only show changes') try: - logging.info(empty_host(profile, shutdown, skip_disable, dry_run, host)) + logging.info(empty_host(profile, shutdown, skip_disable, dry_run, host, target_host)) except RuntimeError as err: logging.error(err) sys.exit(1) diff --git a/rolling_reboot.py b/rolling_reboot.py index 4c07af3..2859612 100755 --- a/rolling_reboot.py +++ b/rolling_reboot.py @@ -48,8 +48,9 @@ @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) @click.argument('cluster') +@click.argument('target_host') def main(profile, ignore_hosts, only_hosts, skip_os_version, reboot_action, pre_empty_script, post_empty_script, - post_reboot_script, dry_run, cluster): + post_reboot_script, dry_run, cluster, target_host): """Perform rolling reboot of hosts in CLUSTER""" click_log.basic_config() @@ -90,7 +91,8 @@ def main(profile, ignore_hosts, only_hosts, skip_os_version, reboot_action, pre_ hosts.sort(key=itemgetter('name')) - target_host = None + if target_host: + target_host = co.get_host(name=target_host) for host in hosts: logging.slack_value = host['name'] logging.zone_name = host['zonename'] From 2f2168b375a9e7061b71ff999c81b17c8f21b0b9 Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Wed, 11 Oct 2023 20:00:50 +0200 Subject: [PATCH 14/17] Fixed pytests --- live_migrate_virtual_machine_volumes.py | 6 +++--- migrate_offline_volumes.py | 4 ---- tests/test_cosmichost.py | 21 ++++++++++--------- tests/test_live_migrate_virtual_machine.py | 4 ++-- ...st_live_migrate_virtual_machine_volumes.py | 13 ++++++------ 5 files changed, 23 insertions(+), 25 deletions(-) diff --git a/live_migrate_virtual_machine_volumes.py b/live_migrate_virtual_machine_volumes.py index 92be34f..44ddc44 100755 --- a/live_migrate_virtual_machine_volumes.py +++ b/live_migrate_virtual_machine_volumes.py @@ -32,9 +32,9 @@ @click.option('--is-project-vm', is_flag=True, help='The specified VM is a project VM') @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) -@click.argument('vm') +@click.argument('vm-name') @click.argument('storage_pool') -def main(profile, max_iops, zwps_to_cwps, is_router, is_project_vm, dry_run, vm, storage_pool): +def main(profile, max_iops, zwps_to_cwps, is_router, is_project_vm, dry_run, vm_name, storage_pool): """Live migrate VM volumes to STORAGE_POOL""" click_log.basic_config() @@ -51,7 +51,7 @@ def main(profile, max_iops, zwps_to_cwps, is_router, is_project_vm, dry_run, vm, cs = CosmicSQL(server=profile, dry_run=dry_run) - if not live_migrate_volumes(storage_pool, co, cs, dry_run, is_router, is_project_vm, log_to_slack, max_iops, vm, + if not live_migrate_volumes(storage_pool, co, cs, dry_run, is_router, is_project_vm, log_to_slack, max_iops, vm_name, zwps_to_cwps): sys.exit(1) diff --git a/migrate_offline_volumes.py b/migrate_offline_volumes.py index a6bb73a..db8366e 100755 --- a/migrate_offline_volumes.py +++ b/migrate_offline_volumes.py @@ -53,10 +53,6 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk logging.error('Destination cluster cannot be the source cluster!') sys.exit(1) - if source_pool_name == destination_pool_name: - logging.error('Destination cluster cannot be the source cluster!') - sys.exit(1) - if dry_run: logging.warning('Running in dry-run mode, will only show changes') diff --git a/tests/test_cosmichost.py b/tests/test_cosmichost.py index 05774b6..68a58ce 100644 --- a/tests/test_cosmichost.py +++ b/tests/test_cosmichost.py @@ -556,8 +556,8 @@ def test_wait_until_offline_dry_run(self): def test_wait_until_online(self): self.host.execute = Mock() - self.host.execute.side_effect = [Mock(return_code=1), Mock(return_code=0)] - self.socket_context.connect_ex.side_effect = [1, 0] + self.host.execute.side_effect = [Mock(return_code=1), Mock(return_code=0), Mock(return_code=1), Mock(return_code=0), Mock(return_code=1), Mock(return_code=0)] + self.socket_context.connect_ex.side_effect = [1, 0, 1, 0, 1, 0] self.host.wait_until_online() self.socket_context.connect_ex.assert_called_with(('host1', 22)) @@ -568,7 +568,7 @@ def test_wait_until_online_retry_on_failure(self): self.host.execute.side_effect = [Mock(return_code=1), ConnectionResetError, UnexpectedExit('mock unexpected exit'), CommandTimedOut('mock command timeout', 10), Mock(return_code=0)] - self.socket_context.connect_ex.side_effect = [1, 0] + self.socket_context.connect_ex.side_effect = [1, 0, 1, 0, 1, 0] self.host.wait_until_online() self.socket_context.connect_ex.assert_called_with(('host1', 22)) @@ -626,7 +626,7 @@ def test_get_disks(self): vm = CosmicVM(Mock(), { 'id': 'vm1', 'name': 'vm', - 'instancename': 'vm' + 'instancename': 'i-1-vm' }) xml_desc = """ @@ -672,22 +672,23 @@ def test_get_disks(self): domain.XMLDesc.return_value = xml_desc domain.blockInfo.return_value = (10737418240, 567148544, 567148544) - self.assertDictEqual(disk_data, self.host.get_disks(vm)) + self.assertDictEqual(disk_data, self.host.get_disks(vm['instancename'])) mock_libvirt.assert_called_with('qemu+tcp://host1/system') - mock_libvirt.return_value.lookupByName.assert_called_with(vm['name']) + mock_libvirt.return_value.lookupByName.assert_called_with(vm['instancename']) mock_libvirt.return_value.close.assert_called() def test_set_iops_limit(self): self.host.execute = Mock(return_value=Mock(return_code=0)) vm = CosmicVM(Mock(), { 'id': 'v1', - 'name': 'vm1' + 'name': 'vm1', + 'instancename': 'i-1-vm' }) - self.assertTrue(self.host.set_iops_limit(vm, 100)) + self.assertTrue(self.host.set_iops_limit(vm['instancename'], 100)) command = self.host.execute.call_args[0][0] - self.assertIn("--details 'vm1'", command) + self.assertIn("--details 'i-1-vm'", command) self.assertIn('--total-iops-sec 100', command) self.host.execute.return_value.return_code = 1 @@ -701,7 +702,7 @@ def test_merge_backing_files(self): 'instancename': 'i-1-VM' }) - self.assertTrue(self.host.merge_backing_files(vm)) + self.assertTrue(self.host.merge_backing_files(vm['instancename'])) command = self.host.execute.call_args[0][0] self.assertIn("--details 'i-1-VM'", command) self.assertIn("blockpull 'i-1-VM'", command) diff --git a/tests/test_live_migrate_virtual_machine.py b/tests/test_live_migrate_virtual_machine.py index 0ce71be..c14bc3f 100644 --- a/tests/test_live_migrate_virtual_machine.py +++ b/tests/test_live_migrate_virtual_machine.py @@ -355,7 +355,7 @@ def test_root_migration_to_zwps(self): self.co_instance.get_storage_pool.assert_has_calls([call(name='zwps_pool'), call(name='root_pool')]) self.root_volume.migrate.assert_called_with(self.zwps_storage_pool, live_migrate=True, - source_host=self.source_host, vm=self.vm) + source_host=self.source_host, vm_instance=self.vm['instancename']) def test_root_migration_to_zwps_dry_run(self): self.vm.get_volumes.return_value = [self.zwps_volume, self.root_volume] @@ -388,4 +388,4 @@ def test_root_migration_to_zwps_failure(self): self.co_instance.get_storage_pool.assert_called_with(name='zwps_pool') self.root_volume.migrate.assert_called_with(self.zwps_storage_pool, live_migrate=True, - source_host=self.source_host, vm=self.vm) + source_host=self.source_host, vm_instance=self.vm['instancename']) diff --git a/tests/test_live_migrate_virtual_machine_volumes.py b/tests/test_live_migrate_virtual_machine_volumes.py index ace591c..1781723 100644 --- a/tests/test_live_migrate_virtual_machine_volumes.py +++ b/tests/test_live_migrate_virtual_machine_volumes.py @@ -45,7 +45,8 @@ def _setup_mocks(self): 'zonename': 'zone', 'hostid': 'sh1', 'maintenancepolicy': 'LiveMigrate', - 'instancename': 'i-VM-1' + 'instancename': 'i-VM-1', + 'state': 'Running' }) self.host = CosmicHost(Mock(), { 'id': 'sh1', @@ -106,13 +107,13 @@ def test_main(self): self.co_instance.get_host.assert_called_with(id=self.vm['hostid']) self.co_instance.get_cluster.assert_called_with(id=self.host['clusterid']) self.cs_instance.update_zwps_to_cwps.assert_not_called() - self.host.get_disks.assert_called_with(self.vm) + self.host.get_disks.assert_called_with(self.vm['instancename']) self.cs_instance.get_volume_size.assert_called_with('path1') self.cs_instance.update_volume_size.assert_not_called() - self.host.set_iops_limit.assert_has_calls([call(self.vm, 1000), call(self.vm, 0)]) - self.host.merge_backing_files.assert_called_with(self.vm) + self.host.set_iops_limit.assert_has_calls([call(self.vm['instancename'], 1000), call(self.vm['instancename'], 0)]) + self.host.merge_backing_files.assert_called_with(self.vm['instancename']) self.vm.get_volumes.assert_called() - self.volume.migrate.assert_called_with(self.target_storage_pool, live_migrate=True, source_host=self.host, vm=self.vm) + self.volume.migrate.assert_called_with(self.target_storage_pool, live_migrate=True, source_host=self.host, vm_instance=self.vm['instancename']) self.volume.refresh.assert_called() def test_main_dry_run(self): @@ -186,7 +187,7 @@ def test_continues(self): self.volume.migrate.assert_not_called() self._setup_mocks() - self.source_storage_pool['scope'] = 'Host' + self.source_storage_pool['scope'] = 'HOST' self.assertEqual(0, self.runner.invoke(live_migrate_virtual_machine_volumes.main, ['--exec', '-p', 'profile', 'vm', 'target_pool']).exit_code) self.assertEqual(2, self.co_instance.get_storage_pool.call_count) From 216a3b7e2cba89a66e465e6f41a2b33e6b4c27bb Mon Sep 17 00:00:00 2001 From: Daan de Goede Date: Wed, 11 Oct 2023 20:18:58 +0200 Subject: [PATCH 15/17] Fix options for rolling_reboot --- empty_host.py | 4 ++-- rolling_reboot.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/empty_host.py b/empty_host.py index 3ca4601..6c7c866 100755 --- a/empty_host.py +++ b/empty_host.py @@ -28,10 +28,10 @@ @click.option('--shutdown', is_flag=True, help='Shutdown host when all VMs have been migrated') @click.option('--skip-disable', is_flag=True, help='Do not disable host before emptying it') @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') +@click.option('--target-host', help='Target hypervisor the migrate VMS to', required=False) @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) @click.argument('host') -@click.option('--target-host', help='Target hypervisor the migrate VMS to', required=False) -def main(profile, shutdown, skip_disable, dry_run, host, target_host): +def main(profile, shutdown, skip_disable, dry_run, target_host, host): """Empty HOST by migrating VMs to another host in the same cluster.""" click_log.basic_config() diff --git a/rolling_reboot.py b/rolling_reboot.py index 2859612..31b5d18 100755 --- a/rolling_reboot.py +++ b/rolling_reboot.py @@ -46,11 +46,11 @@ help='Script to run on host after live migrations have completed') @click.option('--post-reboot-script', metavar='