diff --git a/cosmicops/empty_host.py b/cosmicops/empty_host.py index f29b149..89c8b48 100644 --- a/cosmicops/empty_host.py +++ b/cosmicops/empty_host.py @@ -17,7 +17,7 @@ from cosmicops import CosmicOps, RebootAction, logging -def empty_host(profile, shutdown, skip_disable, dry_run, host): +def empty_host(profile, shutdown, skip_disable, dry_run, host, target_host): click_log.basic_config() log_to_slack = True @@ -35,7 +35,10 @@ def empty_host(profile, shutdown, skip_disable, dry_run, host): if not host.disable(): raise RuntimeError(f"Failed to disable host '{host['name']}'") - (total, success, failed) = host.empty() + if target_host: + target_host = co.get_host(name=target_host) + + (total, success, failed) = host.empty(target=target_host) result_message = f"Result: {success} successful, {failed} failed out of {total} total VMs" if not failed and shutdown: diff --git a/cosmicops/objects/cluster.py b/cosmicops/objects/cluster.py index 7ad7a1e..9cd1c28 100644 --- a/cosmicops/objects/cluster.py +++ b/cosmicops/objects/cluster.py @@ -63,6 +63,9 @@ def find_migration_host(self, vm): if vm_on_dedicated_hv and not host['dedicated']: continue + if not vm_on_dedicated_hv and host['dedicated']: + continue + if vm_on_dedicated_hv and host['affinitygroupid'] != dedicated_affinity_id: continue diff --git a/cosmicops/objects/host.py b/cosmicops/objects/host.py index 2f5236d..1308b13 100644 --- a/cosmicops/objects/host.py +++ b/cosmicops/objects/host.py @@ -421,13 +421,20 @@ def wait_until_online(self): else: logging.info(f"Waiting for '{self['name']}' to come back online", self.log_to_slack) with click_spinner.spinner(): - while True: - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(5) - result = s.connect_ex((self['name'], 22)) - - if result == 0: - break + # adding retry tests, so we need to be able to connect to SSH three times in one minute + # before we consider the host up + tests = 1 + logging.info(f"Waiting for SSH connection, attempt {tests} of 3", False) + while tests <= 3: + while True: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(5) + result = s.connect_ex((self['name'], 22)) + + if result == 0: + break + time.sleep(20) + tests += 1 if self.dry_run: logging.info(f"Would wait for libvirt on '{self['name']}'") @@ -467,10 +474,10 @@ def wait_for_agent(self): time.sleep(5) - def get_disks(self, vm): + def get_disks(self, vm_instancename): lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system") - domain = lv.lookupByName(vm['instancename']) + domain = lv.lookupByName(vm_instancename) tree = ElementTree.fromstring(domain.XMLDesc()) block_devs = tree.findall('devices/disk') @@ -483,6 +490,9 @@ def get_disks(self, vm): dev = disk.find('target').get('dev') full_path = disk.find('source').get('file') + if full_path is None: + logging.info(f"Skipping disk without a file (NVMe?)") + continue _, _, pool, path = full_path.split('/') size, _, _ = domain.blockInfo(dev) @@ -498,24 +508,24 @@ def get_disks(self, vm): return disk_data - def get_domjobinfo(self, vm): + def get_domjobinfo(self, vm_instancename): try: lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system") all_domains = lv.listAllDomains() - if any([x for x in all_domains if x.name() == vm]): - domain = lv.lookupByName(vm) + if any([x for x in all_domains if x.name() == vm_instancename]): + domain = lv.lookupByName(vm_instancename) domjobinfo = domain.jobInfo() return DomJobInfo.from_list(domjobinfo) except libvirt.libvirtError as _: pass # Ignore exception return DomJobInfo() - def get_domjobstats(self, vm, correction=True): + def get_domjobstats(self, vm_instancename, correction=True): try: lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system") all_domains = lv.listAllDomains() - if any([x for x in all_domains if x.name() == vm]): - domain = lv.lookupByName(vm) + if any([x for x in all_domains if x.name() == vm_instancename]): + domain = lv.lookupByName(vm_instancename) domjobstats = domain.jobStats() memory_total = domjobstats.get('memory_total', 0) if correction: @@ -541,14 +551,14 @@ def get_domjobstats(self, vm, correction=True): pass # Ignore exception return DomJobInfo() - def get_blkjobinfo(self, vm, volume): + def get_blkjobinfo(self, vm_instancename, volume): try: - disks = self.get_disks(vm) + disks = self.get_disks(vm_instancename) disk = dict(filter(lambda x: x[0] == volume, disks.items())) lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system") all_domains = lv.listAllDomains() - if any([x for x in all_domains if x.name() == vm['instancename']]): - domain = lv.lookupByName(vm['instancename']) + if any([x for x in all_domains if x.name() == vm_instancename]): + domain = lv.lookupByName(vm_instancename) blkjobinfo = domain.blockJobInfo(disk[volume]['dev'], 0) return BlkJobInfo( jobType=blkjobinfo.get('type', 0), @@ -560,27 +570,27 @@ def get_blkjobinfo(self, vm, volume): pass # Ignore exception return BlkJobInfo() - def set_iops_limit(self, vm, max_iops): + def set_iops_limit(self, vm_instancename, max_iops): command = f""" - for i in $(/usr/bin/virsh domblklist --details '{vm['name']}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do - /usr/bin/virsh blkdeviotune '{vm['name']}' $i --total-iops-sec {max_iops} --live + for i in $(/usr/bin/virsh domblklist --details '{vm_instancename}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do + /usr/bin/virsh blkdeviotune '{vm_instancename}' $i --total-iops-sec {max_iops} --live done """ if not self.execute(command, sudo=True).return_code == 0: - logging.error(f"Failed to set IOPS limit for '{vm['name']}'") + logging.error(f"Failed to set IOPS limit for '{vm_instancename}'") return False else: return True - def merge_backing_files(self, vm): + def merge_backing_files(self, vm_instancename): command = f""" - for i in $(/usr/bin/virsh domblklist --details '{vm['instancename']}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do - /usr/bin/virsh blockpull '{vm['instancename']}' $i --wait --verbose + for i in $(/usr/bin/virsh domblklist --details '{vm_instancename}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do + /usr/bin/virsh blockpull '{vm_instancename}' $i --wait --verbose done """ if not self.execute(command, sudo=True).return_code == 0: - logging.error(f"Failed to merge backing volumes for '{vm['name']}'") + logging.error(f"Failed to merge backing volumes for '{vm_instancename}'") return False else: return True diff --git a/cosmicops/ops.py b/cosmicops/ops.py index 1486411..cd72761 100644 --- a/cosmicops/ops.py +++ b/cosmicops/ops.py @@ -283,7 +283,7 @@ def wait_for_vm_migration_job(self, job_id, retries=10, domjobinfo=True, source_ print() return status - def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, source_host=None, vm=None): + def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, source_host=None, vm_instancename=None): prev_percentage = 0. # Hack to wait for job to start @@ -294,8 +294,8 @@ def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, sour logging.error(f"Error: Could not find volume '{volume_id}'") return False - if blkjobinfo and source_host and vm: - blkjobinfo = source_host.get_blkjobinfo(vm, volume['path']) + if blkjobinfo and source_host and vm_instancename: + blkjobinfo = source_host.get_blkjobinfo(vm_instancename, volume['path']) cur_percentage = float(blkjobinfo.current / (blkjobinfo.end or 1) * 100) if cur_percentage > prev_percentage: prev_percentage = cur_percentage @@ -308,7 +308,7 @@ def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, sour logging.debug(f"Volume '{volume_id}' is in {volume['state']} state and not Ready. Sleeping.") # Return result of job status = self.wait_for_job(job_id=job_id, retries=1) - if blkjobinfo and source_host and vm and status: + if blkjobinfo and source_host and vm_instancename and status: print("100% ") else: print() diff --git a/empty_host.py b/empty_host.py index b4effbd..6c7c866 100755 --- a/empty_host.py +++ b/empty_host.py @@ -28,9 +28,10 @@ @click.option('--shutdown', is_flag=True, help='Shutdown host when all VMs have been migrated') @click.option('--skip-disable', is_flag=True, help='Do not disable host before emptying it') @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') +@click.option('--target-host', help='Target hypervisor the migrate VMS to', required=False) @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) @click.argument('host') -def main(profile, shutdown, skip_disable, dry_run, host): +def main(profile, shutdown, skip_disable, dry_run, target_host, host): """Empty HOST by migrating VMs to another host in the same cluster.""" click_log.basic_config() @@ -39,7 +40,7 @@ def main(profile, shutdown, skip_disable, dry_run, host): logging.info('Running in dry-run mode, will only show changes') try: - logging.info(empty_host(profile, shutdown, skip_disable, dry_run, host)) + logging.info(empty_host(profile, shutdown, skip_disable, dry_run, host, target_host)) except RuntimeError as err: logging.error(err) sys.exit(1) diff --git a/live_migrate_virtual_machine.py b/live_migrate_virtual_machine.py index ebb48e6..06dd63b 100755 --- a/live_migrate_virtual_machine.py +++ b/live_migrate_virtual_machine.py @@ -36,15 +36,16 @@ @click.option('--add-affinity-group', metavar='', help='Add this affinity group after migration') @click.option('--destination-dc', '-d', metavar='', help='Migrate to this datacenter') @click.option('--is-project-vm', is_flag=True, help='The specified VM is a project VM') +@click.option('--avoid-storage-pool', default=None, help='Do not attempt migrate to this storage pool') @click.option('--skip-backingfile-merge', is_flag=True, help='Do not attempt merge backing file') @click.option('--skip-within-cluster', is_flag=True, default=False, show_default=True, help='Enable/disable migration within cluster') @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) -@click.argument('vm') +@click.argument('vm-name') @click.argument('cluster') def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, add_affinity_group, destination_dc, is_project_vm, - skip_backingfile_merge, skip_within_cluster, dry_run, vm, cluster): + avoid_storage_pool, skip_backingfile_merge, skip_within_cluster, dry_run, vm_name, cluster): """Live migrate VM to CLUSTER""" """Unless --migrate-offline-with-rsync is passed, then we migrate offline""" @@ -65,27 +66,27 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a cs = CosmicSQL(server=profile, dry_run=dry_run) # Work around migration issue: first in the same pod to limit possible hiccup - vm_instance = co.get_vm(name=vm, is_project_vm=is_project_vm) + vm = co.get_vm(name=vm_name, is_project_vm=is_project_vm) - if not vm_instance: - logging.error(f"Cannot migrate, VM '{vm}' not found!") + if not vm: + logging.error(f"Cannot migrate, VM '{vm_name}' not found!") sys.exit(1) # Live migrate requires running VM. Unless migrate_offline_with_rsync==True, then we stop the VM as this is offline if not migrate_offline_with_rsync: - if not vm_instance['state'] == 'Running': - logging.error(f"Cannot migrate, VM has has state: '{vm_instance['state']}'") + if not vm['state'] == 'Running': + logging.error(f"Cannot migrate, VM has has state: '{vm['state']}'") sys.exit(1) - source_host = co.get_host(id=vm_instance['hostid']) + source_host = co.get_host(id=vm['hostid']) source_cluster = co.get_cluster(id=source_host['clusterid']) if not skip_within_cluster: - if not vm_instance.migrate_within_cluster(vm=vm_instance, source_cluster=source_cluster, - source_host=source_host, instancename=vm_instance): + if not vm.migrate_within_cluster(vm=vm, source_cluster=source_cluster, + source_host=source_host): logging.info(f"VM Migration failed at {datetime.now().strftime('%d-%m-%Y %H:%M:%S')}\n") sys.exit(1) - if not live_migrate(co, cs, cluster, vm, destination_dc, add_affinity_group, is_project_vm, zwps_to_cwps, + if not live_migrate(co, cs, cluster, vm_name, destination_dc, add_affinity_group, is_project_vm, zwps_to_cwps, log_to_slack, dry_run): logging.info(f"VM Migration failed at {datetime.now().strftime('%d-%m-%Y %H:%M:%S')}\n") sys.exit(1) @@ -98,20 +99,20 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a f" Example: --rsync-target-host {example_target}") sys.exit(1) - if vm_instance['state'] == 'Running': + if vm['state'] == 'Running': need_to_stop = True auto_start_vm = True - elif vm_instance['state'] == 'Stopped': + elif vm['state'] == 'Stopped': need_to_stop = False auto_start_vm = False else: - logging.error(f"Cannot migrate, VM '{vm}' should be in Running or Stopped state!", log_to_slack=True) + logging.error(f"Cannot migrate, VM '{vm_name}' should be in Running or Stopped state!", log_to_slack=True) sys.exit(1) - logging.info(f"VM Migration using rsync method starting for vm {vm_instance['name']}") + logging.info(f"VM Migration using rsync method starting for vm {vm['name']}") - if not vm_instance['state'] == 'Running' and not skip_backingfile_merge: - logging.error(f"Cannot migrate, VM has has state: '{vm_instance['state']}'. In order to merge backing" + if not vm['state'] == 'Running' and not skip_backingfile_merge: + logging.error(f"Cannot migrate, VM has has state: '{vm['state']}'. In order to merge backing" f" files, we need to have a Running VM. We will stop the VM later! You can also skip" f" backing file merging by providing flag --skip-backingfile-merge") sys.exit(1) @@ -121,23 +122,23 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a logging.info( f"Skipping backing file merging due to --skip-backingfile-merge") else: - running_host = co.get_host(name=vm_instance['hostname']) + running_host = co.get_host(name=vm['hostname']) if not dry_run: - if not running_host.merge_backing_files(vm_instance): + if not running_host.merge_backing_files(vm): return False else: logging.info( f"Would have merged all backing files if any exist on {running_host['name']}") if need_to_stop: - if not vm_instance.stop(): - logging.error(f"Stopping failed for VM '{vm_instance['state']}'", log_to_slack=True) + if not vm.stop(): + logging.error(f"Stopping failed for VM '{vm['state']}'", log_to_slack=True) sys.exit(1) # Manually set migrating state to prevent unwanted VM starts - if not cs.set_vm_state(instance_name=vm_instance['instancename'], status_name='Migrating'): - logging.error(f"Cannot set status to Migrating for VM '{vm}'!", log_to_slack=True) + if not cs.set_vm_state(instance_name=vm['instancename'], status_name='Migrating'): + logging.error(f"Cannot set status to Migrating for VM '{vm_name}'!", log_to_slack=True) sys.exit(1) # Here our VM is stopped @@ -161,7 +162,7 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a logging.debug(f"Found target hosts: {target_host}") - volumes = vm_instance.get_volumes() + volumes = vm.get_volumes() volume_id = 0 volume_counter = 0 volume_destination_map = {} @@ -171,6 +172,12 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a storage_pools = sorted(target_cluster.get_storage_pools(), key=lambda h: h['disksizeused']) for storage_pool in storage_pools: + if storage_pool['scope'] == 'HOST': + continue + if storage_pool['state'] == 'Maintenance': + continue + if storage_pool['name'] == avoid_storage_pool: + continue free_space_bytes = int(storage_pool['disksizetotal']) - int(storage_pool['disksizeused']) needed_bytes = volume['size'] * 1.5 if needed_bytes >= free_space_bytes: @@ -185,7 +192,7 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a if volume['storage'] == target_storage_pool['name']: logging.warning( - f"Volume '{volume['name']}' ({volume['id']}) already on cluster '{target_cluster['name']}', skipping..") + f"Volume '{volume['name']}' ({volume['id']}) already on cluster '{target_cluster['name']}/{target_storage_pool['name']}', skipping..") volumes.pop(volume_id) continue @@ -233,11 +240,14 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a # Finally, move volumes in place and update the db for volume in volumes: # Check if VM is still stopped - vm_instance = co.get_vm(name=vm, is_project_vm=is_project_vm) - if not dry_run and vm_instance['state'] != 'Migrating': - logging.error(f"Cannot migrate, VM has state: '{vm_instance['state']}'") + vm = co.get_vm(name=vm_name, is_project_vm=is_project_vm) + if not dry_run and vm['state'] != 'Migrating': + logging.error(f"Cannot migrate, VM has state: '{vm['state']}'") sys.exit(1) + # skip if we did not rsync the volume + if volume['id'] not in volume_destination_map: + continue target_storage_pool = volume_destination_map[volume['id']]['target_storage_pool'] source_storage_pool = volume_destination_map[volume['id']]['source_storage_pool'] source_host_id = volume_destination_map[volume['id']]['source_host_id'] @@ -269,30 +279,31 @@ def main(profile, zwps_to_cwps, migrate_offline_with_rsync, rsync_target_host, a sudo=True, hide_stdout=False, pty=True) # Reset custom state back to Stopped - if not cs.set_vm_state(instance_name=vm_instance['instancename'], status_name='Stopped'): - logging.error(f"Cannot set status to Stopped for VM '{vm}'!", log_to_slack=True) + if not cs.set_vm_state(instance_name=vm['instancename'], status_name='Stopped'): + logging.error(f"Cannot set status to Stopped for VM '{vm_name}'!", log_to_slack=True) sys.exit(1) # Start vm again if needed if auto_start_vm: + vm = co.get_vm(name=vm_name, is_project_vm=is_project_vm) # Make sure status is stopped if not dry_run: retry_count = 0 - while vm_instance['state'] != 'Stopped': - logging.info(f"VM '{vm}' has state '{vm_instance['state']}': waiting for status 'Stopped'") - vm_instance = co.get_vm(name=vm, is_project_vm=is_project_vm) + while vm['state'] != 'Stopped': + logging.info(f"VM '{vm_name}' has state '{vm['state']}': waiting for status 'Stopped'") + vm.refresh() time.sleep(15) retry_count += 1 if retry_count > 6: break - destination_host = target_cluster.find_migration_host(vm_instance) + destination_host = target_cluster.find_migration_host(vm) if not destination_host: - logging.error(f"Starting failed for VM '{vm_instance['state']}': no destination host found", log_to_slack=True) + logging.error(f"Starting failed for VM '{vm['name']}': no destination host found", log_to_slack=True) sys.exit(1) # Start on a specific host to prevent unwanted migrations back to source - if not vm_instance.start(destination_host): - logging.error(f"Starting failed for VM '{vm_instance['state']}'", log_to_slack=True) + if not vm.start(destination_host): + logging.error(f"Starting failed for VM '{vm['name']}'", log_to_slack=True) sys.exit(1) logging.info(f"VM Migration completed at {datetime.now().strftime('%d-%m-%Y %H:%M:%S')}\n") @@ -542,7 +553,7 @@ def temp_migrate_volume(co, dry_run, log_to_slack, volume, vm, target_pool_name) f"Migrating volume '{volume['name']}' of VM '{vm['name']}' to pool '{target_pool_name}'", log_to_slack=log_to_slack) - if not volume.migrate(target_storage_pool, live_migrate=True, source_host=source_host, vm=vm): + if not volume.migrate(target_storage_pool, live_migrate=True, source_host=source_host, vm_instancename=vm['instancename']): logging.error(f"Migration failed for volume '{volume['name']}' of VM '{vm['name']}'", log_to_slack=log_to_slack) return False diff --git a/live_migrate_virtual_machine_volumes.py b/live_migrate_virtual_machine_volumes.py index 8d87847..3910597 100755 --- a/live_migrate_virtual_machine_volumes.py +++ b/live_migrate_virtual_machine_volumes.py @@ -28,12 +28,13 @@ @click.option('--max-iops', '-m', metavar='<# IOPS>', default=1000, show_default=True, help='Limit amount of IOPS used during migration, use 0 to disable') @click.option('--zwps-to-cwps', is_flag=True, help='Migrate from ZWPS to CWPS') +@click.option('--is-router', is_flag=True, help='The specified VM is a router') @click.option('--is-project-vm', is_flag=True, help='The specified VM is a project VM') @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) -@click.argument('vm') +@click.argument('vm-name') @click.argument('storage_pool') -def main(profile, max_iops, zwps_to_cwps, is_project_vm, dry_run, vm, storage_pool): +def main(profile, max_iops, zwps_to_cwps, is_router, is_project_vm, dry_run, vm_name, storage_pool): """Live migrate VM volumes to STORAGE_POOL""" click_log.basic_config() @@ -50,12 +51,12 @@ def main(profile, max_iops, zwps_to_cwps, is_project_vm, dry_run, vm, storage_po cs = CosmicSQL(server=profile, dry_run=dry_run) - if not live_migrate_volumes(storage_pool, co, cs, dry_run, is_project_vm, log_to_slack, max_iops, vm, + if not live_migrate_volumes(storage_pool, co, cs, dry_run, is_router, is_project_vm, log_to_slack, max_iops, vm_name, zwps_to_cwps): sys.exit(1) -def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_vm, log_to_slack, max_iops, vm_name, zwps_to_cwps): +def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_router, is_project_vm, log_to_slack, max_iops, vm_name, zwps_to_cwps): target_storage_pool = co.get_storage_pool(name=target_storage_pool_name) if not target_storage_pool: return False @@ -63,25 +64,41 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v # disable setting max IOPS, if max_iops != 0 set_max_iops = max_iops != 0 - vm = co.get_vm(name=vm_name, is_project_vm=is_project_vm) + if is_router: + vm = co.get_router(name=vm_name,is_project_router=is_project_vm) + else: + vm = co.get_vm(name=vm_name, is_project_vm=is_project_vm) if not vm: + logging.error(f"Failed to find VM with name: '{vm_name}'", log_to_slack=False) return False - logging.instance_name = vm['instancename'] + if is_router: + logging.instance_name = vm['name'] + vm_instancename = vm['name'] + else: + logging.instance_name = vm['instancename'] + vm_instancename = vm['instancename'] logging.slack_value = vm['domain'] logging.vm_name = vm['name'] logging.zone_name = vm['zonename'] + if vm['state'] != 'Running': + logging.error(f"Failed, VM with name: '{vm_name}' is not in state 'Running'!", log_to_slack=False) + return False + + logging.info( f"Starting live migration of volumes of VM '{vm['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})", log_to_slack=log_to_slack) host = co.get_host(id=vm['hostid']) if not host: + logging.error(f"Failed to get host with host_id: '{vm['hostid']}'", log_to_slack=False) return False cluster = co.get_cluster(id=host['clusterid']) if not cluster: + logging.error(f"Failed to get cluster with cluster_id: '{host['clusterid']}'", log_to_slack=False) return False logging.cluster = cluster['name'] @@ -97,7 +114,7 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v logging.info('Would have changed the diskoffering from ZWPS to CWPS of all ZWPS volumes') if not dry_run: - disk_info = host.get_disks(vm) + disk_info = host.get_disks(vm_instancename) for path, disk_info in disk_info.items(): _, path, _, _, size = cs.get_volume_size(path) @@ -108,7 +125,7 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v if set_max_iops: if not dry_run: - if not host.set_iops_limit(vm, max_iops): + if not host.set_iops_limit(vm_instancename, max_iops): return False else: logging.info( @@ -118,9 +135,9 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v f'Not setting an IOPS limit as it is disabled') if not dry_run: - if not host.merge_backing_files(vm): + if not host.merge_backing_files(vm_instancename): if set_max_iops: - host.set_iops_limit(vm, 0) + host.set_iops_limit(vm_instancename, 0) return False else: logging.info( @@ -136,7 +153,7 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v if not source_storage_pool: continue - if source_storage_pool['scope'] == 'Host' or (source_storage_pool['scope'] == 'ZONE' and not zwps_to_cwps): + if source_storage_pool['scope'] == 'HOST' or (source_storage_pool['scope'] == 'ZONE' and not zwps_to_cwps): logging.warning(f"Skipping volume '{volume['name']}' as it's scope is '{source_storage_pool['scope']}'", log_to_slack=log_to_slack) continue @@ -157,7 +174,7 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v # get the source host to read the blkjobinfo source_host = co.get_host(id=vm['hostid']) - if not volume.migrate(target_storage_pool, live_migrate=True, source_host=source_host, vm=vm): + if not volume.migrate(target_storage_pool, live_migrate=True, source_host=source_host): continue with click_spinner.spinner(): @@ -178,8 +195,13 @@ def live_migrate_volumes(target_storage_pool_name, co, cs, dry_run, is_project_v logging.info( f"Finished live migration of volumes of VM '{vm['name']}' to storage pool '{target_storage_pool['name']}' ({target_storage_pool['id']})", log_to_slack=log_to_slack) - if not dry_run: - host.set_iops_limit(vm, 0) + + if set_max_iops: + if not dry_run: + host.set_iops_limit(vm_instancename, 0) + else: + logging.info( + f"Would have disable an IOPS limit") return True diff --git a/migrate_offline_volumes.py b/migrate_offline_volumes.py index aa20b9c..db8366e 100755 --- a/migrate_offline_volumes.py +++ b/migrate_offline_volumes.py @@ -32,10 +32,11 @@ @click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run') @click.option('--destination-cluster-name', help='Name of the destination cluster') @click.option('--destination-pool-name', help='Name of the destination pool') +@click.option('--source-pool-name', help='Name of the source pool') @click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True) @click.argument('source_cluster_name') def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, skip_domains, only_project, - source_cluster_name, destination_cluster_name, destination_pool_name): + source_cluster_name, destination_cluster_name, destination_pool_name, source_pool_name): """Migrate offline volumes from SOURCE_CLUSTER to DESTINATION_CLUSTER""" click_log.basic_config() @@ -59,11 +60,28 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk cs = CosmicSQL(server=profile, dry_run=dry_run) source_cluster = co.get_cluster(name=source_cluster_name) - source_storage_pools = co.get_all_storage_pools(name=source_cluster_name) - if not source_cluster and not source_storage_pools: + if not source_cluster: logging.error(f"Source cluster not found:'{source_cluster_name}'!") sys.exit(1) + if not source_pool_name: + try: + source_storage_pools = source_cluster.get_storage_pools(scope='CLUSTER') + except IndexError: + logging.error(f"No storage pools found for cluster '{source_cluster['name']}'") + sys.exit(1) + else: + source_storage_pool = co.get_storage_pool(name=source_pool_name) + if not source_storage_pool: + logging.error(f"Source storage pool not found '{source_pool_name}'") + sys.exit(1) + else: + if source_storage_pool['clustername'].upper() != source_cluster_name.upper(): + logging.error(f"Source storage pool '{source_pool_name}' is not part of the source cluster '{source_cluster_name}'") + sys.exit(1) + source_storage_pools = [source_storage_pool] + + destination_cluster = None if destination_cluster_name: destination_cluster = co.get_cluster(name=destination_cluster_name) @@ -71,17 +89,6 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk logging.error(f"Destination cluster not found:'{destination_cluster_name}'!") sys.exit(1) - if source_cluster: - try: - source_storage_pools = source_cluster.get_storage_pools(scope='CLUSTER') - except IndexError: - logging.error(f"No storage pools found for cluster '{source_cluster['name']}'") - sys.exit(1) - - logging.info('Source storage pools found:') - for source_storage_pool in source_storage_pools: - logging.info(f" - '{source_storage_pool['name']}'") - destination_storage_pools = None if destination_cluster: try: @@ -89,9 +96,23 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk except IndexError: logging.error(f"No storage pools found for cluster '{destination_cluster['name']}'") sys.exit(1) - logging.info('Destination storage pools found:') - for destination_storage_pool in destination_storage_pools: - logging.info(f" - '{destination_storage_pool['name']}'") + + if destination_pool_name: + destination_storage_pool = co.get_storage_pool(name=destination_pool_name) + if not destination_storage_pool: + logging.error(f"Destination storage pool not found '{destination_pool_name}'") + sys.exit(1) + else: + destination_storage_pools = [destination_storage_pool] + + logging.info('Source storage pools found:') + for source_storage_pool in source_storage_pools: + if source_pool_name and source_storage_pool['name'] != source_pool_name: + continue + logging.info(f" - '{source_storage_pool['name']}'") + logging.info('Destination storage pools found:') + for destination_storage_pool in destination_storage_pools: + logging.info(f" - '{destination_storage_pool['name']}'") if ignore_volumes: ignore_volumes = ignore_volumes.replace(' ', '').split(',') @@ -106,10 +127,9 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk logging.info(f"Skipping domains: {str(skip_domains)}") for source_storage_pool in source_storage_pools: - if destination_storage_pools is not None: - destination_storage_pool = choice(destination_storage_pools) - else: - destination_storage_pool = co.get_storage_pool(name=destination_pool_name) + if source_pool_name and source_storage_pool['name'] != source_pool_name: + continue + destination_storage_pool = choice(destination_storage_pools) volumes = source_storage_pool.get_volumes(only_project) @@ -154,12 +174,8 @@ def main(profile, dry_run, ignore_volumes, zwps_to_cwps, skip_disk_offerings, sk logging.info( f"Would have changed the diskoffering for volume '{volume['name']}' to CWPS before starting the migration") - if source_cluster: - logging.info( - f"Volume '{volume['name']}' will be migrated from cluster '{source_storage_pool['name']}' to '{destination_storage_pool['name']}'") - else: - logging.info( - f"Volume '{volume['name']}' will be migrated from storage pool '{source_storage_pool['name']}' to '{destination_storage_pool['name']}'") + logging.info( + f"Volume '{volume['name']}' will be migrated from storage pool '{source_storage_pool['name']}' to '{destination_storage_pool['name']}'") if not volume.migrate(destination_storage_pool): continue diff --git a/rolling_reboot.py b/rolling_reboot.py index 4c07af3..31b5d18 100755 --- a/rolling_reboot.py +++ b/rolling_reboot.py @@ -46,10 +46,11 @@ help='Script to run on host after live migrations have completed') @click.option('--post-reboot-script', metavar='