Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix storage maintenance mode rebase #92

Merged
merged 17 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cosmicops/empty_host.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from cosmicops import CosmicOps, RebootAction, logging


def empty_host(profile, shutdown, skip_disable, dry_run, host):
def empty_host(profile, shutdown, skip_disable, dry_run, host, target_host):
click_log.basic_config()

log_to_slack = True
Expand All @@ -35,7 +35,10 @@ def empty_host(profile, shutdown, skip_disable, dry_run, host):
if not host.disable():
raise RuntimeError(f"Failed to disable host '{host['name']}'")

(total, success, failed) = host.empty()
if target_host:
target_host = co.get_host(name=target_host)

(total, success, failed) = host.empty(target=target_host)
result_message = f"Result: {success} successful, {failed} failed out of {total} total VMs"

if not failed and shutdown:
Expand Down
3 changes: 3 additions & 0 deletions cosmicops/objects/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ def find_migration_host(self, vm):
if vm_on_dedicated_hv and not host['dedicated']:
continue

if not vm_on_dedicated_hv and host['dedicated']:
continue

if vm_on_dedicated_hv and host['affinitygroupid'] != dedicated_affinity_id:
continue

Expand Down
64 changes: 37 additions & 27 deletions cosmicops/objects/host.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,13 +421,20 @@ def wait_until_online(self):
else:
logging.info(f"Waiting for '{self['name']}' to come back online", self.log_to_slack)
with click_spinner.spinner():
while True:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.settimeout(5)
result = s.connect_ex((self['name'], 22))

if result == 0:
break
# adding retry tests, so we need to be able to connect to SSH three times in one minute
# before we consider the host up
tests = 1
logging.info(f"Waiting for SSH connection, attempt {tests} of 3", False)
while tests <= 3:
while True:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.settimeout(5)
result = s.connect_ex((self['name'], 22))

if result == 0:
break
time.sleep(20)
tests += 1

if self.dry_run:
logging.info(f"Would wait for libvirt on '{self['name']}'")
Expand Down Expand Up @@ -467,10 +474,10 @@ def wait_for_agent(self):

time.sleep(5)

def get_disks(self, vm):
def get_disks(self, vm_instancename):
lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system")

domain = lv.lookupByName(vm['instancename'])
domain = lv.lookupByName(vm_instancename)

tree = ElementTree.fromstring(domain.XMLDesc())
block_devs = tree.findall('devices/disk')
Expand All @@ -483,6 +490,9 @@ def get_disks(self, vm):

dev = disk.find('target').get('dev')
full_path = disk.find('source').get('file')
if full_path is None:
logging.info(f"Skipping disk without a file (NVMe?)")
continue
_, _, pool, path = full_path.split('/')

size, _, _ = domain.blockInfo(dev)
Expand All @@ -498,24 +508,24 @@ def get_disks(self, vm):

return disk_data

def get_domjobinfo(self, vm):
def get_domjobinfo(self, vm_instancename):
try:
lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system")
all_domains = lv.listAllDomains()
if any([x for x in all_domains if x.name() == vm]):
domain = lv.lookupByName(vm)
if any([x for x in all_domains if x.name() == vm_instancename]):
domain = lv.lookupByName(vm_instancename)
domjobinfo = domain.jobInfo()
return DomJobInfo.from_list(domjobinfo)
except libvirt.libvirtError as _:
pass # Ignore exception
return DomJobInfo()

def get_domjobstats(self, vm, correction=True):
def get_domjobstats(self, vm_instancename, correction=True):
try:
lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system")
all_domains = lv.listAllDomains()
if any([x for x in all_domains if x.name() == vm]):
domain = lv.lookupByName(vm)
if any([x for x in all_domains if x.name() == vm_instancename]):
domain = lv.lookupByName(vm_instancename)
domjobstats = domain.jobStats()
memory_total = domjobstats.get('memory_total', 0)
if correction:
Expand All @@ -541,14 +551,14 @@ def get_domjobstats(self, vm, correction=True):
pass # Ignore exception
return DomJobInfo()

def get_blkjobinfo(self, vm, volume):
def get_blkjobinfo(self, vm_instancename, volume):
try:
disks = self.get_disks(vm)
disks = self.get_disks(vm_instancename)
disk = dict(filter(lambda x: x[0] == volume, disks.items()))
lv = libvirt.openReadOnly(f"qemu+tcp://{self['name']}/system")
all_domains = lv.listAllDomains()
if any([x for x in all_domains if x.name() == vm['instancename']]):
domain = lv.lookupByName(vm['instancename'])
if any([x for x in all_domains if x.name() == vm_instancename]):
domain = lv.lookupByName(vm_instancename)
blkjobinfo = domain.blockJobInfo(disk[volume]['dev'], 0)
return BlkJobInfo(
jobType=blkjobinfo.get('type', 0),
Expand All @@ -560,27 +570,27 @@ def get_blkjobinfo(self, vm, volume):
pass # Ignore exception
return BlkJobInfo()

def set_iops_limit(self, vm, max_iops):
def set_iops_limit(self, vm_instancename, max_iops):
command = f"""
for i in $(/usr/bin/virsh domblklist --details '{vm['name']}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do
/usr/bin/virsh blkdeviotune '{vm['name']}' $i --total-iops-sec {max_iops} --live
for i in $(/usr/bin/virsh domblklist --details '{vm_instancename}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do
/usr/bin/virsh blkdeviotune '{vm_instancename}' $i --total-iops-sec {max_iops} --live
done
"""

if not self.execute(command, sudo=True).return_code == 0:
logging.error(f"Failed to set IOPS limit for '{vm['name']}'")
logging.error(f"Failed to set IOPS limit for '{vm_instancename}'")
return False
else:
return True

def merge_backing_files(self, vm):
def merge_backing_files(self, vm_instancename):
command = f"""
for i in $(/usr/bin/virsh domblklist --details '{vm['instancename']}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do
/usr/bin/virsh blockpull '{vm['instancename']}' $i --wait --verbose
for i in $(/usr/bin/virsh domblklist --details '{vm_instancename}' | grep disk | grep file | /usr/bin/awk '{{print $3}}'); do
/usr/bin/virsh blockpull '{vm_instancename}' $i --wait --verbose
done
"""
if not self.execute(command, sudo=True).return_code == 0:
logging.error(f"Failed to merge backing volumes for '{vm['name']}'")
logging.error(f"Failed to merge backing volumes for '{vm_instancename}'")
return False
else:
return True
Expand Down
8 changes: 4 additions & 4 deletions cosmicops/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def wait_for_vm_migration_job(self, job_id, retries=10, domjobinfo=True, source_
print()
return status

def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, source_host=None, vm=None):
def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, source_host=None, vm_instancename=None):
prev_percentage = 0.

# Hack to wait for job to start
Expand All @@ -294,8 +294,8 @@ def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, sour
logging.error(f"Error: Could not find volume '{volume_id}'")
return False

if blkjobinfo and source_host and vm:
blkjobinfo = source_host.get_blkjobinfo(vm, volume['path'])
if blkjobinfo and source_host and vm_instancename:
blkjobinfo = source_host.get_blkjobinfo(vm_instancename, volume['path'])
cur_percentage = float(blkjobinfo.current / (blkjobinfo.end or 1) * 100)
if cur_percentage > prev_percentage:
prev_percentage = cur_percentage
Expand All @@ -308,7 +308,7 @@ def wait_for_volume_migration_job(self, volume_id, job_id, blkjobinfo=True, sour
logging.debug(f"Volume '{volume_id}' is in {volume['state']} state and not Ready. Sleeping.")
# Return result of job
status = self.wait_for_job(job_id=job_id, retries=1)
if blkjobinfo and source_host and vm and status:
if blkjobinfo and source_host and vm_instancename and status:
print("100% ")
else:
print()
Expand Down
5 changes: 3 additions & 2 deletions empty_host.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
@click.option('--shutdown', is_flag=True, help='Shutdown host when all VMs have been migrated')
@click.option('--skip-disable', is_flag=True, help='Do not disable host before emptying it')
@click.option('--dry-run/--exec', is_flag=True, default=True, show_default=True, help='Enable/disable dry-run')
@click.option('--target-host', help='Target hypervisor the migrate VMS to', required=False)
@click_log.simple_verbosity_option(logging.getLogger(), default="INFO", show_default=True)
@click.argument('host')
def main(profile, shutdown, skip_disable, dry_run, host):
def main(profile, shutdown, skip_disable, dry_run, target_host, host):
"""Empty HOST by migrating VMs to another host in the same cluster."""

click_log.basic_config()
Expand All @@ -39,7 +40,7 @@ def main(profile, shutdown, skip_disable, dry_run, host):
logging.info('Running in dry-run mode, will only show changes')

try:
logging.info(empty_host(profile, shutdown, skip_disable, dry_run, host))
logging.info(empty_host(profile, shutdown, skip_disable, dry_run, host, target_host))
except RuntimeError as err:
logging.error(err)
sys.exit(1)
Expand Down
Loading