From eae912a4771737188285bf152fd0d766f8853df0 Mon Sep 17 00:00:00 2001 From: davidtclin Date: Fri, 12 Jul 2024 18:20:51 +0800 Subject: [PATCH 1/8] Add test case for restore backup from vm have snapshot created --- .../integrations/test_4_vm_backup_restore.py | 203 ++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py index 784010c1e..71c5b9f4b 100644 --- a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py +++ b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py @@ -560,6 +560,209 @@ def test_restore_with_invalid_name(self, api_client, backup_config, base_vm_with code, data = api_client.backups.restore(unique_vm_name, spec) assert 422 == code, (code, data) + @pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True) + def test_with_snapshot_restore_with_new_vm( + self, api_client, vm_shell_from_host, vm_checker, ssh_keypair, wait_timeout, + backup_config, base_vm_with_data + ): + unique_vm_name, backup_data = base_vm_with_data['name'], base_vm_with_data['data'] + pub_key, pri_key = ssh_keypair + + vm_backup_name = unique_vm_name + '-backup-new' + + # Create backup with the name as VM's name + code, data = api_client.vms.backup(unique_vm_name, vm_backup_name) + assert 204 == code, (code, data) + # Check backup is ready + endtime = datetime.now() + timedelta(seconds=wait_timeout) + while endtime > datetime.now(): + code, backup = api_client.backups.get(vm_backup_name) + if 200 == code and backup.get('status', {}).get('readyToUse'): + break + sleep(3) + else: + raise AssertionError( + f'Timed-out waiting for the backup \'{vm_backup_name}\' to be ready.' + ) + + vm_snapshot_name = unique_vm_name + '-snapshot-new' + # take vm snapshot + code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) + assert 201 == code + + deadline = datetime.now() + timedelta(seconds=wait_timeout) + while deadline > datetime.now(): + code, data = api_client.vm_snapshots.get(vm_snapshot_name) + if data.get("status", {}).get("readyToUse"): + break + print(f"waiting for {vm_snapshot_name} to be ready") + sleep(3) + else: + raise AssertionError(f"timed out waiting for {vm_snapshot_name} to be ready") + + assert 200 == code + assert data.get("status", {}).get("readyToUse") is True + + # mess up the existing data + with vm_shell_from_host( + base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'], + base_vm_with_data['ssh_user'], pkey=pri_key + ) as sh: + out, err = sh.exec_command(f"echo {pub_key!r} > {base_vm_with_data['data']['path']}") + assert not err, (out, err) + sh.exec_command('sync') + + # Restore VM into new + restored_vm_name = f"{backup_config[0].lower()}-restore-{unique_vm_name}" + spec = api_client.backups.RestoreSpec.for_new(restored_vm_name) + code, data = api_client.backups.restore(unique_vm_name, spec) + assert 201 == code, (code, data) + + # Check VM Started then get IPs (vm and host) + vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(restored_vm_name, ['default']) + assert vm_got_ips, ( + f"Failed to Start VM({restored_vm_name}) with errors:\n" + f"Status: {data.get('status')}\n" + f"API Status({code}): {data}" + ) + vm_ip = next(iface['ipAddress'] for iface in data['status']['interfaces'] + if iface['name'] == 'default') + code, data = api_client.hosts.get(data['status']['nodeName']) + host_ip = next(addr['address'] for addr in data['status']['addresses'] + if addr['type'] == 'InternalIP') + + # Login to the new VM and check data is existing + with vm_shell_from_host(host_ip, vm_ip, base_vm_with_data['ssh_user'], pkey=pri_key) as sh: + endtime = datetime.now() + timedelta(seconds=wait_timeout) + while endtime > datetime.now(): + out, err = sh.exec_command('cloud-init status') + if 'done' in out: + break + sleep(3) + else: + raise AssertionError( + f"VM {restored_vm_name} Started {wait_timeout} seconds" + f", but cloud-init still in {out}" + ) + + out, err = sh.exec_command(f"cat {backup_data['path']}") + + assert backup_data['content'] in out, ( + f"cloud-init writefile failed\n" + f"Executed stdout: {out}\n" + f"Executed stderr: {err}" + ) + + # teardown: delete restored vm and volumes + code, data = api_client.vms.get(restored_vm_name) + vm_spec = api_client.vms.Spec.from_dict(data) + api_client.vms.delete(restored_vm_name) + endtime = datetime.now() + timedelta(seconds=wait_timeout) + while endtime > datetime.now(): + code, data = api_client.vms.get(restored_vm_name) + if 404 == code: + break + sleep(3) + else: + raise AssertionError( + f"Failed to Delete VM({restored_vm_name}) with errors:\n" + f"Status({code}): {data}" + ) + for vol in vm_spec.volumes: + vol_name = vol['volume']['persistentVolumeClaim']['claimName'] + api_client.volumes.delete(vol_name) + + @pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True) + def test_with_snapshot_restore_replace_retain_vols( + self, api_client, vm_shell_from_host, ssh_keypair, wait_timeout, vm_checker, + backup_config, base_vm_with_data + ): + unique_vm_name, backup_data = base_vm_with_data['name'], base_vm_with_data['data'] + pub_key, pri_key = ssh_keypair + + vm_backup_name = unique_vm_name + '-backup-replace' + + # Create backup with the name as VM's name + code, data = api_client.vms.backup(unique_vm_name, vm_backup_name) + assert 204 == code, (code, data) + # Check backup is ready + endtime = datetime.now() + timedelta(seconds=wait_timeout) + while endtime > datetime.now(): + code, backup = api_client.backups.get(vm_backup_name) + if 200 == code and backup.get('status', {}).get('readyToUse'): + break + sleep(3) + else: + raise AssertionError( + f'Timed-out waiting for the backup \'{vm_backup_name}\' to be ready.' + ) + + vm_snapshot_name = unique_vm_name + '-snapshot-replace' + # take vm snapshot + code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) + assert 201 == code + + deadline = datetime.now() + timedelta(seconds=wait_timeout) + while deadline > datetime.now(): + code, data = api_client.vm_snapshots.get(vm_snapshot_name) + if data.get("status", {}).get("readyToUse"): + break + print(f"waiting for {vm_snapshot_name} to be ready") + sleep(3) + else: + raise AssertionError(f"timed out waiting for {vm_snapshot_name} to be ready") + + assert 200 == code + assert data.get("status", {}).get("readyToUse") is True + + # mess up the existing data + with vm_shell_from_host( + base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'], + base_vm_with_data['ssh_user'], pkey=pri_key + ) as sh: + out, err = sh.exec_command(f"echo {pub_key!r} > {base_vm_with_data['data']['path']}") + assert not err, (out, err) + sh.exec_command('sync') + + # Stop the VM then restore existing + vm_stopped, (code, data) = vm_checker.wait_stopped(unique_vm_name) + assert vm_stopped, ( + f"Failed to Stop VM({unique_vm_name}) with errors:\n" + f"Status({code}): {data}" + ) + + spec = api_client.backups.RestoreSpec.for_existing(delete_volumes=False) + code, data = api_client.backups.restore(unique_vm_name, spec) + assert 201 == code, f'Failed to restore backup with current VM replaced, {data}' + + # Check VM Started then get IPs (vm and host) + vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) + assert vm_got_ips, ( + f"Failed to Start VM({unique_vm_name}) with errors:\n" + f"Status: {data.get('status')}\n" + f"API Status({code}): {data}" + ) + vm_ip = next(iface['ipAddress'] for iface in data['status']['interfaces'] + if iface['name'] == 'default') + code, data = api_client.hosts.get(data['status']['nodeName']) + host_ip = next(addr['address'] for addr in data['status']['addresses'] + if addr['type'] == 'InternalIP') + + # Login to the new VM and check data is existing + with vm_shell_from_host(host_ip, vm_ip, base_vm_with_data['ssh_user'], pkey=pri_key) as sh: + cloud_inited, (out, err) = vm_checker.wait_cloudinit_done(sh) + assert cloud_inited, ( + f"VM {unique_vm_name} Started {wait_timeout} seconds" + f", but cloud-init still in {out}" + ) + out, err = sh.exec_command(f"cat {backup_data['path']}") + + assert backup_data['content'] in out, ( + f"cloud-init writefile failed\n" + f"Executed stdout: {out}\n" + f"Executed stderr: {err}" + ) + @pytest.mark.skip_version_if('< v1.2.2') @pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True) def test_restore_replace_with_vm_shutdown_command( From 6ab1ec69ee1f8b4339364ad8fe9d48f7a5a67ddb Mon Sep 17 00:00:00 2001 From: davidtclin Date: Sat, 13 Jul 2024 14:31:40 +0800 Subject: [PATCH 2/8] Create new class TestBackupRestoreWithSnapshot to place new test scripts --- .../integrations/test_4_vm_backup_restore.py | 127 ++++++++++++++---- 1 file changed, 101 insertions(+), 26 deletions(-) diff --git a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py index 71c5b9f4b..3a4b08a21 100644 --- a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py +++ b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py @@ -560,32 +560,124 @@ def test_restore_with_invalid_name(self, api_client, backup_config, base_vm_with code, data = api_client.backups.restore(unique_vm_name, spec) assert 422 == code, (code, data) + @pytest.mark.skip_version_if('< v1.2.2') @pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True) - def test_with_snapshot_restore_with_new_vm( - self, api_client, vm_shell_from_host, vm_checker, ssh_keypair, wait_timeout, + def test_restore_replace_with_vm_shutdown_command( + self, api_client, vm_shell_from_host, ssh_keypair, wait_timeout, vm_checker, backup_config, base_vm_with_data ): + ''' ref: https://github.com/harvester/tests/issues/943 + 1. Create VM and write some data + 2. Take backup for the VM + 3. Mess up existing data + 3. Shutdown the VM by executing `shutdown` command in OS + 4. Restore backup to replace existing VM + 5. VM should be restored successfully + 6. Data in VM should be the same as backed up + ''' + unique_vm_name, backup_data = base_vm_with_data['name'], base_vm_with_data['data'] pub_key, pri_key = ssh_keypair - vm_backup_name = unique_vm_name + '-backup-new' + # mess up the existing data then shutdown it + with vm_shell_from_host( + base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'], + base_vm_with_data['ssh_user'], pkey=pri_key + ) as sh: + out, err = sh.exec_command(f"echo {pub_key!r} > {base_vm_with_data['data']['path']}") + assert not err, (out, err) + sh.exec_command('sync') + sh.exec_command('sudo shutdown now') + + endtime = datetime.now() + timedelta(seconds=wait_timeout) + while endtime > datetime.now(): + code, data = api_client.vms.get(unique_vm_name) + if 200 == code and "Stopped" == data.get('status', {}).get('printableStatus'): + break + sleep(5) + else: + raise AssertionError( + f"Failed to shut down VM({unique_vm_name}) with errors:\n" + f"Status({code}): {data}" + ) + + # restore VM to existing + spec = api_client.backups.RestoreSpec.for_existing(delete_volumes=True) + code, data = api_client.backups.restore(unique_vm_name, spec) + assert 201 == code, f'Failed to restore backup with current VM replaced, {data}' + + # Check VM Started then get IPs (vm and host) + vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) + assert vm_got_ips, ( + f"Failed to Start VM({unique_vm_name}) with errors:\n" + f"Status: {data.get('status')}\n" + f"API Status({code}): {data}" + ) + vm_ip = next(iface['ipAddress'] for iface in data['status']['interfaces'] + if iface['name'] == 'default') + code, data = api_client.hosts.get(data['status']['nodeName']) + host_ip = next(addr['address'] for addr in data['status']['addresses'] + if addr['type'] == 'InternalIP') + + # Login to the new VM and check data is existing + with vm_shell_from_host(host_ip, vm_ip, base_vm_with_data['ssh_user'], pkey=pri_key) as sh: + cloud_inited, (out, err) = vm_checker.wait_cloudinit_done(sh) + assert cloud_inited, ( + f"VM {unique_vm_name} Started {wait_timeout} seconds" + f", but cloud-init still in {out}" + ) + out, err = sh.exec_command(f"cat {backup_data['path']}") + + assert backup_data['content'] in out, ( + f"cloud-init writefile failed\n" + f"Executed stdout: {out}\n" + f"Executed stderr: {err}" + ) + + +@pytest.mark.p0 +@pytest.mark.backup_target +@pytest.mark.parametrize( + "backup_config", [ + pytest.param("S3", marks=pytest.mark.S3), + pytest.param("NFS", marks=pytest.mark.NFS) + ], + indirect=True) +class TestBackupRestoreWithSnapshot: + + @pytest.mark.dependency() + def test_connection(self, api_client, backup_config, config_backup_target): + code, data = api_client.settings.backup_target_test_connection() + assert 200 == code, f'Failed to test backup target connection: {data}' + + @pytest.mark.dependency(depends=["TestBackupRestoreWithSnapshot::test_connection"], param=True) + def tests_backup_vm(self, api_client, wait_timeout, backup_config, base_vm_with_data): + unique_vm_name = base_vm_with_data['name'] # Create backup with the name as VM's name - code, data = api_client.vms.backup(unique_vm_name, vm_backup_name) + code, data = api_client.vms.backup(unique_vm_name, unique_vm_name) assert 204 == code, (code, data) # Check backup is ready endtime = datetime.now() + timedelta(seconds=wait_timeout) while endtime > datetime.now(): - code, backup = api_client.backups.get(vm_backup_name) + code, backup = api_client.backups.get(unique_vm_name) if 200 == code and backup.get('status', {}).get('readyToUse'): break sleep(3) else: raise AssertionError( - f'Timed-out waiting for the backup \'{vm_backup_name}\' to be ready.' + f'Timed-out waiting for the backup \'{unique_vm_name}\' to be ready.' ) - vm_snapshot_name = unique_vm_name + '-snapshot-new' + @pytest.mark.dependency(depends=["TestBackupRestoreWithSnapshot::tests_backup_vm"], param=True) + def test_with_snapshot_restore_with_new_vm( + self, api_client, vm_shell_from_host, vm_checker, ssh_keypair, wait_timeout, + backup_config, base_vm_with_data + ): + unique_vm_name, backup_data = base_vm_with_data['name'], base_vm_with_data['data'] + pub_key, pri_key = ssh_keypair + + vm_snapshot_name = unique_vm_name + '-snapshot' # take vm snapshot code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) assert 201 == code @@ -672,7 +764,7 @@ def test_with_snapshot_restore_with_new_vm( vol_name = vol['volume']['persistentVolumeClaim']['claimName'] api_client.volumes.delete(vol_name) - @pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True) + @pytest.mark.dependency(depends=["TestBackupRestoreWithSnapshot::tests_backup_vm"], param=True) def test_with_snapshot_restore_replace_retain_vols( self, api_client, vm_shell_from_host, ssh_keypair, wait_timeout, vm_checker, backup_config, base_vm_with_data @@ -680,24 +772,7 @@ def test_with_snapshot_restore_replace_retain_vols( unique_vm_name, backup_data = base_vm_with_data['name'], base_vm_with_data['data'] pub_key, pri_key = ssh_keypair - vm_backup_name = unique_vm_name + '-backup-replace' - - # Create backup with the name as VM's name - code, data = api_client.vms.backup(unique_vm_name, vm_backup_name) - assert 204 == code, (code, data) - # Check backup is ready - endtime = datetime.now() + timedelta(seconds=wait_timeout) - while endtime > datetime.now(): - code, backup = api_client.backups.get(vm_backup_name) - if 200 == code and backup.get('status', {}).get('readyToUse'): - break - sleep(3) - else: - raise AssertionError( - f'Timed-out waiting for the backup \'{vm_backup_name}\' to be ready.' - ) - - vm_snapshot_name = unique_vm_name + '-snapshot-replace' + vm_snapshot_name = unique_vm_name + '-snapshot-retain' # take vm snapshot code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) assert 201 == code From 11111ea2c29cf94d16005a2d46dd5775ab0a360d Mon Sep 17 00:00:00 2001 From: davidtclin Date: Sun, 21 Jul 2024 23:54:16 +0800 Subject: [PATCH 3/8] Set specific backup name and check retain volume exists --- .../integrations/test_4_vm_backup_restore.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py index 3a4b08a21..cd44e500c 100644 --- a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py +++ b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py @@ -653,20 +653,20 @@ def test_connection(self, api_client, backup_config, config_backup_target): @pytest.mark.dependency(depends=["TestBackupRestoreWithSnapshot::test_connection"], param=True) def tests_backup_vm(self, api_client, wait_timeout, backup_config, base_vm_with_data): unique_vm_name = base_vm_with_data['name'] - + unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" # Create backup with the name as VM's name - code, data = api_client.vms.backup(unique_vm_name, unique_vm_name) + code, data = api_client.vms.backup(unique_vm_name, unique_backup_name) assert 204 == code, (code, data) # Check backup is ready endtime = datetime.now() + timedelta(seconds=wait_timeout) while endtime > datetime.now(): - code, backup = api_client.backups.get(unique_vm_name) + code, backup = api_client.backups.get(unique_backup_name) if 200 == code and backup.get('status', {}).get('readyToUse'): break sleep(3) else: raise AssertionError( - f'Timed-out waiting for the backup \'{unique_vm_name}\' to be ready.' + f'Timed-out waiting for the backup \'{unique_backup_name}\' to be ready.' ) @pytest.mark.dependency(depends=["TestBackupRestoreWithSnapshot::tests_backup_vm"], param=True) @@ -678,6 +678,7 @@ def test_with_snapshot_restore_with_new_vm( pub_key, pri_key = ssh_keypair vm_snapshot_name = unique_vm_name + '-snapshot' + unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" # take vm snapshot code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) assert 201 == code @@ -705,9 +706,9 @@ def test_with_snapshot_restore_with_new_vm( sh.exec_command('sync') # Restore VM into new - restored_vm_name = f"{backup_config[0].lower()}-restore-{unique_vm_name}" + restored_vm_name = f"{backup_config[0].lower()}-restore-{unique_vm_name}-with-snapshot" spec = api_client.backups.RestoreSpec.for_new(restored_vm_name) - code, data = api_client.backups.restore(unique_vm_name, spec) + code, data = api_client.backups.restore(unique_backup_name, spec) assert 201 == code, (code, data) # Check VM Started then get IPs (vm and host) @@ -773,6 +774,7 @@ def test_with_snapshot_restore_replace_retain_vols( pub_key, pri_key = ssh_keypair vm_snapshot_name = unique_vm_name + '-snapshot-retain' + unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" # take vm snapshot code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) assert 201 == code @@ -807,7 +809,7 @@ def test_with_snapshot_restore_replace_retain_vols( ) spec = api_client.backups.RestoreSpec.for_existing(delete_volumes=False) - code, data = api_client.backups.restore(unique_vm_name, spec) + code, data = api_client.backups.restore(unique_backup_name, spec) assert 201 == code, f'Failed to restore backup with current VM replaced, {data}' # Check VM Started then get IPs (vm and host) From 98b69d5738094f0c0bbc86efbf6c2c14edc89bc1 Mon Sep 17 00:00:00 2001 From: davidtclin Date: Mon, 22 Jul 2024 15:26:12 +0800 Subject: [PATCH 4/8] Add todo action to remind to clean up the retain volume --- harvester_e2e_tests/integrations/test_4_vm_backup_restore.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py index cd44e500c..a7e4c3171 100644 --- a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py +++ b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py @@ -916,6 +916,9 @@ def test_restore_replace_with_vm_shutdown_command( f"Executed stderr: {err}" ) + # TODO: try to find the suitable solution to delete the retain volume + # without making the deleting VM stuck in Terminating + @pytest.mark.skip("https://github.com/harvester/harvester/issues/1473") @pytest.mark.p0 From 001236b977d63e79b30fdb8c2617546128b688b7 Mon Sep 17 00:00:00 2001 From: davidtclin Date: Fri, 9 Aug 2024 15:35:47 +0800 Subject: [PATCH 5/8] Rebase with latest main changes --- harvester_e2e_tests/integrations/test_4_vm_backup_restore.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py index a7e4c3171..fd06561ea 100644 --- a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py +++ b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py @@ -711,6 +711,9 @@ def test_with_snapshot_restore_with_new_vm( code, data = api_client.backups.restore(unique_backup_name, spec) assert 201 == code, (code, data) + vm_getable, (code, data) = vm_checker.wait_getable(restored_vm_name) + assert vm_getable, (code, data) + # Check VM Started then get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(restored_vm_name, ['default']) assert vm_got_ips, ( @@ -811,6 +814,8 @@ def test_with_snapshot_restore_replace_retain_vols( spec = api_client.backups.RestoreSpec.for_existing(delete_volumes=False) code, data = api_client.backups.restore(unique_backup_name, spec) assert 201 == code, f'Failed to restore backup with current VM replaced, {data}' + vm_getable, (code, data) = vm_checker.wait_getable(unique_vm_name) + assert vm_getable, (code, data) # Check VM Started then get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) From 5577c19d5f59c7320ad438ce71daff0277592a64 Mon Sep 17 00:00:00 2001 From: davidtclin Date: Mon, 2 Sep 2024 15:07:20 +0800 Subject: [PATCH 6/8] Move the snapshot restore testcases to the TestBackupRestore class --- .../integrations/test_4_vm_backup_restore.py | 154 +++++------------- 1 file changed, 38 insertions(+), 116 deletions(-) diff --git a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py index fd06561ea..d53a5e373 100644 --- a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py +++ b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py @@ -634,42 +634,7 @@ def test_restore_replace_with_vm_shutdown_command( f"Executed stderr: {err}" ) - -@pytest.mark.p0 -@pytest.mark.backup_target -@pytest.mark.parametrize( - "backup_config", [ - pytest.param("S3", marks=pytest.mark.S3), - pytest.param("NFS", marks=pytest.mark.NFS) - ], - indirect=True) -class TestBackupRestoreWithSnapshot: - - @pytest.mark.dependency() - def test_connection(self, api_client, backup_config, config_backup_target): - code, data = api_client.settings.backup_target_test_connection() - assert 200 == code, f'Failed to test backup target connection: {data}' - - @pytest.mark.dependency(depends=["TestBackupRestoreWithSnapshot::test_connection"], param=True) - def tests_backup_vm(self, api_client, wait_timeout, backup_config, base_vm_with_data): - unique_vm_name = base_vm_with_data['name'] - unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" - # Create backup with the name as VM's name - code, data = api_client.vms.backup(unique_vm_name, unique_backup_name) - assert 204 == code, (code, data) - # Check backup is ready - endtime = datetime.now() + timedelta(seconds=wait_timeout) - while endtime > datetime.now(): - code, backup = api_client.backups.get(unique_backup_name) - if 200 == code and backup.get('status', {}).get('readyToUse'): - break - sleep(3) - else: - raise AssertionError( - f'Timed-out waiting for the backup \'{unique_backup_name}\' to be ready.' - ) - - @pytest.mark.dependency(depends=["TestBackupRestoreWithSnapshot::tests_backup_vm"], param=True) + @pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True) def test_with_snapshot_restore_with_new_vm( self, api_client, vm_shell_from_host, vm_checker, ssh_keypair, wait_timeout, backup_config, base_vm_with_data @@ -678,7 +643,7 @@ def test_with_snapshot_restore_with_new_vm( pub_key, pri_key = ssh_keypair vm_snapshot_name = unique_vm_name + '-snapshot' - unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" + # unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" # take vm snapshot code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) assert 201 == code @@ -696,6 +661,26 @@ def test_with_snapshot_restore_with_new_vm( assert 200 == code assert data.get("status", {}).get("readyToUse") is True + vm_running, (code, data) = vm_checker.wait_status_running(unique_vm_name) + assert vm_running, ( + f"Failed to restore VM({unique_vm_name}) with errors:\n" + f"Status({code}): {data}" + ) + + # Check VM Started then get IPs (vm and host) + vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) + assert vm_got_ips, ( + f"Failed to Start VM({unique_vm_name}) with errors:\n" + f"Status: {data.get('status')}\n" + f"API Status({code}): {data}" + ) + vm_ip = next(iface['ipAddress'] for iface in data['status']['interfaces'] + if iface['name'] == 'default') + code, data = api_client.hosts.get(data['status']['nodeName']) + host_ip = next(addr['address'] for addr in data['status']['addresses'] + if addr['type'] == 'InternalIP') + base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'] = host_ip, vm_ip + # mess up the existing data with vm_shell_from_host( base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'], @@ -708,12 +693,9 @@ def test_with_snapshot_restore_with_new_vm( # Restore VM into new restored_vm_name = f"{backup_config[0].lower()}-restore-{unique_vm_name}-with-snapshot" spec = api_client.backups.RestoreSpec.for_new(restored_vm_name) - code, data = api_client.backups.restore(unique_backup_name, spec) + code, data = api_client.backups.restore(unique_vm_name, spec) assert 201 == code, (code, data) - vm_getable, (code, data) = vm_checker.wait_getable(restored_vm_name) - assert vm_getable, (code, data) - # Check VM Started then get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(restored_vm_name, ['default']) assert vm_got_ips, ( @@ -768,7 +750,7 @@ def test_with_snapshot_restore_with_new_vm( vol_name = vol['volume']['persistentVolumeClaim']['claimName'] api_client.volumes.delete(vol_name) - @pytest.mark.dependency(depends=["TestBackupRestoreWithSnapshot::tests_backup_vm"], param=True) + @pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True) def test_with_snapshot_restore_replace_retain_vols( self, api_client, vm_shell_from_host, ssh_keypair, wait_timeout, vm_checker, backup_config, base_vm_with_data @@ -777,7 +759,7 @@ def test_with_snapshot_restore_replace_retain_vols( pub_key, pri_key = ssh_keypair vm_snapshot_name = unique_vm_name + '-snapshot-retain' - unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" + # unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" # take vm snapshot code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) assert 201 == code @@ -795,28 +777,12 @@ def test_with_snapshot_restore_replace_retain_vols( assert 200 == code assert data.get("status", {}).get("readyToUse") is True - # mess up the existing data - with vm_shell_from_host( - base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'], - base_vm_with_data['ssh_user'], pkey=pri_key - ) as sh: - out, err = sh.exec_command(f"echo {pub_key!r} > {base_vm_with_data['data']['path']}") - assert not err, (out, err) - sh.exec_command('sync') - - # Stop the VM then restore existing - vm_stopped, (code, data) = vm_checker.wait_stopped(unique_vm_name) - assert vm_stopped, ( - f"Failed to Stop VM({unique_vm_name}) with errors:\n" + vm_running, (code, data) = vm_checker.wait_status_running(unique_vm_name) + assert vm_running, ( + f"Failed to restore VM({unique_vm_name}) with errors:\n" f"Status({code}): {data}" ) - spec = api_client.backups.RestoreSpec.for_existing(delete_volumes=False) - code, data = api_client.backups.restore(unique_backup_name, spec) - assert 201 == code, f'Failed to restore backup with current VM replaced, {data}' - vm_getable, (code, data) = vm_checker.wait_getable(unique_vm_name) - assert vm_getable, (code, data) - # Check VM Started then get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) assert vm_got_ips, ( @@ -829,42 +795,9 @@ def test_with_snapshot_restore_replace_retain_vols( code, data = api_client.hosts.get(data['status']['nodeName']) host_ip = next(addr['address'] for addr in data['status']['addresses'] if addr['type'] == 'InternalIP') + base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'] = host_ip, vm_ip - # Login to the new VM and check data is existing - with vm_shell_from_host(host_ip, vm_ip, base_vm_with_data['ssh_user'], pkey=pri_key) as sh: - cloud_inited, (out, err) = vm_checker.wait_cloudinit_done(sh) - assert cloud_inited, ( - f"VM {unique_vm_name} Started {wait_timeout} seconds" - f", but cloud-init still in {out}" - ) - out, err = sh.exec_command(f"cat {backup_data['path']}") - - assert backup_data['content'] in out, ( - f"cloud-init writefile failed\n" - f"Executed stdout: {out}\n" - f"Executed stderr: {err}" - ) - - @pytest.mark.skip_version_if('< v1.2.2') - @pytest.mark.dependency(depends=["TestBackupRestore::tests_backup_vm"], param=True) - def test_restore_replace_with_vm_shutdown_command( - self, api_client, vm_shell_from_host, ssh_keypair, wait_timeout, vm_checker, - backup_config, base_vm_with_data - ): - ''' ref: https://github.com/harvester/tests/issues/943 - 1. Create VM and write some data - 2. Take backup for the VM - 3. Mess up existing data - 3. Shutdown the VM by executing `shutdown` command in OS - 4. Restore backup to replace existing VM - 5. VM should be restored successfully - 6. Data in VM should be the same as backed up - ''' - - unique_vm_name, backup_data = base_vm_with_data['name'], base_vm_with_data['data'] - pub_key, pri_key = ssh_keypair - - # mess up the existing data then shutdown it + # mess up the existing data with vm_shell_from_host( base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'], base_vm_with_data['ssh_user'], pkey=pri_key @@ -872,26 +805,17 @@ def test_restore_replace_with_vm_shutdown_command( out, err = sh.exec_command(f"echo {pub_key!r} > {base_vm_with_data['data']['path']}") assert not err, (out, err) sh.exec_command('sync') - sh.exec_command('sudo shutdown now') - endtime = datetime.now() + timedelta(seconds=wait_timeout) - while endtime > datetime.now(): - code, data = api_client.vms.get(unique_vm_name) - if 200 == code and "Stopped" == data.get('status', {}).get('printableStatus'): - break - sleep(5) - else: - raise AssertionError( - f"Failed to shut down VM({unique_vm_name}) with errors:\n" - f"Status({code}): {data}" - ) + # Stop the VM then restore existing + vm_stopped, (code, data) = vm_checker.wait_stopped(unique_vm_name) + assert vm_stopped, ( + f"Failed to Stop VM({unique_vm_name}) with errors:\n" + f"Status({code}): {data}" + ) - # restore VM to existing - spec = api_client.backups.RestoreSpec.for_existing(delete_volumes=True) + spec = api_client.backups.RestoreSpec.for_existing(delete_volumes=False) code, data = api_client.backups.restore(unique_vm_name, spec) assert 201 == code, f'Failed to restore backup with current VM replaced, {data}' - vm_getable, (code, data) = vm_checker.wait_getable(unique_vm_name) - assert vm_getable, (code, data) # Check VM Started then get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) @@ -905,6 +829,7 @@ def test_restore_replace_with_vm_shutdown_command( code, data = api_client.hosts.get(data['status']['nodeName']) host_ip = next(addr['address'] for addr in data['status']['addresses'] if addr['type'] == 'InternalIP') + base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'] = host_ip, vm_ip # Login to the new VM and check data is existing with vm_shell_from_host(host_ip, vm_ip, base_vm_with_data['ssh_user'], pkey=pri_key) as sh: @@ -921,9 +846,6 @@ def test_restore_replace_with_vm_shutdown_command( f"Executed stderr: {err}" ) - # TODO: try to find the suitable solution to delete the retain volume - # without making the deleting VM stuck in Terminating - @pytest.mark.skip("https://github.com/harvester/harvester/issues/1473") @pytest.mark.p0 From 206612a93b978c893a2dffb721b64201a61e3594 Mon Sep 17 00:00:00 2001 From: davidtclin Date: Fri, 6 Sep 2024 00:44:04 +0800 Subject: [PATCH 7/8] Fix Jenkins job execution failure --- .../integrations/test_4_vm_backup_restore.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py index d53a5e373..f7824e50a 100644 --- a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py +++ b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py @@ -643,7 +643,7 @@ def test_with_snapshot_restore_with_new_vm( pub_key, pri_key = ssh_keypair vm_snapshot_name = unique_vm_name + '-snapshot' - # unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" + # take vm snapshot code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) assert 201 == code @@ -695,6 +695,8 @@ def test_with_snapshot_restore_with_new_vm( spec = api_client.backups.RestoreSpec.for_new(restored_vm_name) code, data = api_client.backups.restore(unique_vm_name, spec) assert 201 == code, (code, data) + vm_getable, (code, data) = vm_checker.wait_getable(restored_vm_name) + assert vm_getable, (code, data) # Check VM Started then get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(restored_vm_name, ['default']) @@ -708,6 +710,7 @@ def test_with_snapshot_restore_with_new_vm( code, data = api_client.hosts.get(data['status']['nodeName']) host_ip = next(addr['address'] for addr in data['status']['addresses'] if addr['type'] == 'InternalIP') + base_vm_with_data['host_ip'], base_vm_with_data['vm_ip'] = host_ip, vm_ip # Login to the new VM and check data is existing with vm_shell_from_host(host_ip, vm_ip, base_vm_with_data['ssh_user'], pkey=pri_key) as sh: @@ -759,7 +762,7 @@ def test_with_snapshot_restore_replace_retain_vols( pub_key, pri_key = ssh_keypair vm_snapshot_name = unique_vm_name + '-snapshot-retain' - # unique_backup_name = f"{unique_vm_name}-backup-with-snapshot" + # take vm snapshot code, data = api_client.vm_snapshots.create(unique_vm_name, vm_snapshot_name) assert 201 == code @@ -817,6 +820,12 @@ def test_with_snapshot_restore_replace_retain_vols( code, data = api_client.backups.restore(unique_vm_name, spec) assert 201 == code, f'Failed to restore backup with current VM replaced, {data}' + vm_running, (code, data) = vm_checker.wait_status_running(unique_vm_name) + assert vm_running, ( + f"Failed to restore VM({unique_vm_name}) with errors:\n" + f"Status({code}): {data}" + ) + # Check VM Started then get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) assert vm_got_ips, ( From 534f32935a23979a532fa361ce97761c16c92abd Mon Sep 17 00:00:00 2001 From: davidtclin Date: Thu, 12 Sep 2024 23:57:54 +0800 Subject: [PATCH 8/8] Update with review request part1 --- .../integrations/test_4_vm_backup_restore.py | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py index f7824e50a..f65c1311b 100644 --- a/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py +++ b/harvester_e2e_tests/integrations/test_4_vm_backup_restore.py @@ -605,6 +605,8 @@ def test_restore_replace_with_vm_shutdown_command( spec = api_client.backups.RestoreSpec.for_existing(delete_volumes=True) code, data = api_client.backups.restore(unique_vm_name, spec) assert 201 == code, f'Failed to restore backup with current VM replaced, {data}' + vm_getable, (code, data) = vm_checker.wait_getable(unique_vm_name) + assert vm_getable, (code, data) # Check VM Started then get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) @@ -652,14 +654,15 @@ def test_with_snapshot_restore_with_new_vm( while deadline > datetime.now(): code, data = api_client.vm_snapshots.get(vm_snapshot_name) if data.get("status", {}).get("readyToUse"): + assert 200 == code break print(f"waiting for {vm_snapshot_name} to be ready") sleep(3) else: - raise AssertionError(f"timed out waiting for {vm_snapshot_name} to be ready") - - assert 200 == code - assert data.get("status", {}).get("readyToUse") is True + raise AssertionError( + f"timed out waiting for {vm_snapshot_name} to be ready:\n" + f"Status({code}): {data}" + ) vm_running, (code, data) = vm_checker.wait_status_running(unique_vm_name) assert vm_running, ( @@ -667,10 +670,10 @@ def test_with_snapshot_restore_with_new_vm( f"Status({code}): {data}" ) - # Check VM Started then get IPs (vm and host) + # Check VM is still running and can get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) assert vm_got_ips, ( - f"Failed to Start VM({unique_vm_name}) with errors:\n" + f"Failed to check the VM({unique_vm_name}) is sill in running state:\n" f"Status: {data.get('status')}\n" f"API Status({code}): {data}" ) @@ -771,14 +774,15 @@ def test_with_snapshot_restore_replace_retain_vols( while deadline > datetime.now(): code, data = api_client.vm_snapshots.get(vm_snapshot_name) if data.get("status", {}).get("readyToUse"): + assert 200 == code break print(f"waiting for {vm_snapshot_name} to be ready") sleep(3) else: - raise AssertionError(f"timed out waiting for {vm_snapshot_name} to be ready") - - assert 200 == code - assert data.get("status", {}).get("readyToUse") is True + raise AssertionError( + f"timed out waiting for {vm_snapshot_name} to be ready:\n" + f"Status({code}): {data}" + ) vm_running, (code, data) = vm_checker.wait_status_running(unique_vm_name) assert vm_running, ( @@ -786,10 +790,10 @@ def test_with_snapshot_restore_replace_retain_vols( f"Status({code}): {data}" ) - # Check VM Started then get IPs (vm and host) + # Check VM is still running and can get IPs (vm and host) vm_got_ips, (code, data) = vm_checker.wait_ip_addresses(unique_vm_name, ['default']) assert vm_got_ips, ( - f"Failed to Start VM({unique_vm_name}) with errors:\n" + f"Failed to check the VM({unique_vm_name}) is sill in running state:\n" f"Status: {data.get('status')}\n" f"API Status({code}): {data}" )