Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[e2e][backend] Add test cases for restore backup with snapshot created #1384

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
223 changes: 223 additions & 0 deletions harvester_e2e_tests/integrations/test_4_vm_backup_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,229 @@ def test_restore_replace_with_vm_shutdown_command(
f"Executed stderr: {err}"
)

@pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True)
def test_with_snapshot_restore_with_new_vm(
self, api_client, vm_shell_from_host, vm_checker, ssh_keypair, wait_timeout,
backup_config, base_vm_with_data
):
unique_vm_name, backup_data = base_vm_with_data['name'], base_vm_with_data['data']
pub_key, pri_key = ssh_keypair

vm_snapshot_name = unique_vm_name + '-snapshot'

# take vm snapshot
code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name)
assert 201 == code

deadline = datetime.now() + timedelta(seconds=wait_timeout)
while deadline > datetime.now():
code, data = api_client.vm_snapshots.get(vm_snapshot_name)
if data.get("status", {}).get("readyToUse"):
assert 200 == code
break
print(f"waiting for {vm_snapshot_name} to be ready")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop temporary code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the check.
The purpose to add the line to check vm_snapshot is ready was to ensure the snapshot are actually created before proceeding to the next step.

sleep(3)
else:
raise AssertionError(
f"timed out waiting for {vm_snapshot_name} to be ready:\n"
f"Status({code}): {data}"
)

vm_running, (code, data) = vm_checker.wait_status_running(unique_vm_name)
assert vm_running, (
f"Failed to restore VM({unique_vm_name}) with errors:\n"
f"Status({code}): {data}"
)

# Check VM is still running and can get IPs (vm and host)
vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default'])
assert vm_got_ips, (
f"Failed to check the VM({unique_vm_name}) is sill in running state:\n"
f"Status: {data.get('status')}\n"
f"API Status({code}): {data}"
)
vm_ip = next(iface['ipAddress'] for iface in data['status']['interfaces']
if iface['name'] == 'default')
code, data = api_client.hosts.get(data['status']['nodeName'])
host_ip = next(addr['address'] for addr in data['status']['addresses']
if addr['type'] == 'InternalIP')
base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'] = host_ip, vm_ip

# mess up the existing data
with vm_shell_from_host(
base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'],
base_vm_with_data['ssh_user'], pkey=pri_key
) as sh:
out, err = sh.exec_command(f"echo {pub_key!r} > {base_vm_with_data['data']['path']}")
assert not err, (out, err)
sh.exec_command('sync')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does messing operation also needed in restore new?
As it's restore new, the messed data will not get back on base_vm.


# Restore VM into new
restored_vm_name = f"{backup_config[0].lower()}-restore-{unique_vm_name}-with-snapshot"
spec = api_client.backups.RestoreSpec.for_new(restored_vm_name)
code, data = api_client.backups.restore(unique_vm_name, spec)
assert 201 == code, (code, data)
vm_getable, (code, data) = vm_checker.wait_getable(restored_vm_name)
assert vm_getable, (code, data)

# Check VM Started then get IPs (vm and host)
vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(restored_vm_name, ['default'])
assert vm_got_ips, (
f"Failed to Start VM({restored_vm_name}) with errors:\n"
f"Status: {data.get('status')}\n"
f"API Status({code}): {data}"
)
vm_ip = next(iface['ipAddress'] for iface in data['status']['interfaces']
if iface['name'] == 'default')
code, data = api_client.hosts.get(data['status']['nodeName'])
host_ip = next(addr['address'] for addr in data['status']['addresses']
if addr['type'] == 'InternalIP')
base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'] = host_ip, vm_ip

# Login to the new VM and check data is existing
with vm_shell_from_host(host_ip, vm_ip, base_vm_with_data['ssh_user'], pkey=pri_key) as sh:
endtime = datetime.now() + timedelta(seconds=wait_timeout)
while endtime > datetime.now():
out, err = sh.exec_command('cloud-init status')
if 'done' in out:
break
sleep(3)
else:
raise AssertionError(
f"VM {restored_vm_name} Started {wait_timeout} seconds"
f", but cloud-init still in {out}"
)

out, err = sh.exec_command(f"cat {backup_data['path']}")

assert backup_data['content'] in out, (
f"cloud-init writefile failed\n"
f"Executed stdout: {out}\n"
f"Executed stderr: {err}"
)

# teardown: delete restored vm and volumes
albinsun marked this conversation as resolved.
Show resolved Hide resolved
code, data = api_client.vms.get(restored_vm_name)
vm_spec = api_client.vms.Spec.from_dict(data)
api_client.vms.delete(restored_vm_name)
endtime = datetime.now() + timedelta(seconds=wait_timeout)
while endtime > datetime.now():
code, data = api_client.vms.get(restored_vm_name)
if 404 == code:
break
sleep(3)
else:
raise AssertionError(
f"Failed to Delete VM({restored_vm_name}) with errors:\n"
f"Status({code}): {data}"
)
for vol in vm_spec.volumes:
vol_name = vol['volume']['persistentVolumeClaim']['claimName']
api_client.volumes.delete(vol_name)

@pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True)
def test_with_snapshot_restore_replace_retain_vols(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we parameterize delete_volumes thus can test Delete Previous Volumes for both Delete and Retain?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good idea to use parameterize config to cover both the Delete and Retain test cases.

While according to the test issue #1045

We just select the Restore backup to replace existing (retain volume)

The reason is when the vm also have snapshot created, even if we shutdown the vm, the backend would check and prevent to restore to replace existing with delete volume.
image

Thus here we test the replace existing with retain volume only.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Snapshot function is based on the volume, so delete volume will not work with snapshot should be the expected feature, or maybe we can discuss the case in sync up meeting to double confirm it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion.
After moving the snapshot restore test cases into the original TestBackupRestore.
When test successfully complete, all the test generated volume will be automatically cleanup.

self, api_client, vm_shell_from_host, ssh_keypair, wait_timeout, vm_checker,
backup_config, base_vm_with_data
):
unique_vm_name, backup_data = base_vm_with_data['name'], base_vm_with_data['data']
pub_key, pri_key = ssh_keypair

vm_snapshot_name = unique_vm_name + '-snapshot-retain'

# take vm snapshot
code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name)
assert 201 == code

deadline = datetime.now() + timedelta(seconds=wait_timeout)
while deadline > datetime.now():
code, data = api_client.vm_snapshots.get(vm_snapshot_name)
if data.get("status", {}).get("readyToUse"):
assert 200 == code
break
print(f"waiting for {vm_snapshot_name} to be ready")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as previous.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion.
Add the code and data in error message like others in the code.

sleep(3)
else:
raise AssertionError(
f"timed out waiting for {vm_snapshot_name} to be ready:\n"
f"Status({code}): {data}"
)

vm_running, (code, data) = vm_checker.wait_status_running(unique_vm_name)
assert vm_running, (
f"Failed to restore VM({unique_vm_name}) with errors:\n"
f"Status({code}): {data}"
)

# Check VM is still running and can get IPs (vm and host)
vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default'])
assert vm_got_ips, (
f"Failed to check the VM({unique_vm_name}) is sill in running state:\n"
f"Status: {data.get('status')}\n"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as previous.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the check.
The VM did not restart before, just make sure the vm is running after taking the snapshot.
Just updated the assert error message accordingly.

f"API Status({code}): {data}"
)
vm_ip = next(iface['ipAddress'] for iface in data['status']['interfaces']
if iface['name'] == 'default')
code, data = api_client.hosts.get(data['status']['nodeName'])
host_ip = next(addr['address'] for addr in data['status']['addresses']
if addr['type'] == 'InternalIP')
base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'] = host_ip, vm_ip

# mess up the existing data
with vm_shell_from_host(
base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'],
base_vm_with_data['ssh_user'], pkey=pri_key
) as sh:
out, err = sh.exec_command(f"echo {pub_key!r} > {base_vm_with_data['data']['path']}")
assert not err, (out, err)
sh.exec_command('sync')

# Stop the VM then restore existing
vm_stopped, (code, data) = vm_checker.wait_stopped(unique_vm_name)
assert vm_stopped, (
f"Failed to Stop VM({unique_vm_name}) with errors:\n"
f"Status({code}): {data}"
)

spec = api_client.backups.RestoreSpec.for_existing(delete_volumes=False)
code, data = api_client.backups.restore(unique_vm_name, spec)
assert 201 == code, f'Failed to restore backup with current VM replaced, {data}'

vm_running, (code, data) = vm_checker.wait_status_running(unique_vm_name)
assert vm_running, (
f"Failed to restore VM({unique_vm_name}) with errors:\n"
f"Status({code}): {data}"
)

# Check VM Started then get IPs (vm and host)
vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default'])
assert vm_got_ips, (
f"Failed to Start VM({unique_vm_name}) with errors:\n"
f"Status: {data.get('status')}\n"
f"API Status({code}): {data}"
)
vm_ip = next(iface['ipAddress'] for iface in data['status']['interfaces']
if iface['name'] == 'default')
code, data = api_client.hosts.get(data['status']['nodeName'])
host_ip = next(addr['address'] for addr in data['status']['addresses']
if addr['type'] == 'InternalIP')
base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'] = host_ip, vm_ip

# Login to the new VM and check data is existing
with vm_shell_from_host(host_ip, vm_ip, base_vm_with_data['ssh_user'], pkey=pri_key) as sh:
cloud_inited, (out, err) = vm_checker.wait_cloudinit_done(sh)
assert cloud_inited, (
f"VM {unique_vm_name} Started {wait_timeout} seconds"
f", but cloud-init still in {out}"
)
out, err = sh.exec_command(f"cat {backup_data['path']}")

assert backup_data['content'] in out, (
f"cloud-init writefile failed\n"
f"Executed stdout: {out}\n"
f"Executed stderr: {err}"
)
albinsun marked this conversation as resolved.
Show resolved Hide resolved


@pytest.mark.skip("https://github.com/harvester/harvester/issues/1473")
@pytest.mark.p0
Expand Down