From 650f3a5d511690ec27648b30f3b24532378a33a1 Mon Sep 17 00:00:00 2001 From: Rodrigo Barbieri Date: Fri, 6 Oct 2023 10:24:48 -0300 Subject: [PATCH] [v2] Fix migration across nova-compute apps using ceph This change reworks previous changes [1] and [2] that had been respectively reverted and abandoned. When using the config libvirt-image-backend=rbd, VMs created from image have their disk data stored in ceph instead of the compute node itself. When performing live-migrations, both nodes need to access the same ceph credentials to access the VM's disk in ceph, but this is currently not possible if the nodes involved pertain to different nova-compute charm apps. This patch changes app name sent to ceph to 'nova-compute-ceph-auth-c91ce26f', a unique name common to all nova-compute apps, allowing all nova-compute apps to use the same ceph auth. This change also ensures newly deployed nodes install the old credentials first on ceph-joined hook, and then supercedes it with the new credentials on ceph-changed hook, therefore also retaining the old credentials. This patch also includes the charmhelpers sync from PR: #840 [1] https://review.opendev.org/889642 [2] https://review.opendev.org/896155 Closes-bug: #2028559 Related-bug: #2037003 Func-Test-Pr: https://github.com/openstack-charmers/zaza-openstack-tests/pull/1149 Change-Id: I1ae12d787a1f8e7761ca06b5a80049c1c62e9e90 --- .../contrib/storage/linux/ceph.py | 8 +- hooks/nova_compute_context.py | 32 ++++++-- hooks/nova_compute_hooks.py | 39 +++++++-- unit_tests/test_nova_compute_contexts.py | 57 +++++++++++++ unit_tests/test_nova_compute_hooks.py | 82 +++++++++++++++++-- 5 files changed, 199 insertions(+), 19 deletions(-) diff --git a/hooks/charmhelpers/contrib/storage/linux/ceph.py b/hooks/charmhelpers/contrib/storage/linux/ceph.py index 2e1fc1b5..6ec67cba 100644 --- a/hooks/charmhelpers/contrib/storage/linux/ceph.py +++ b/hooks/charmhelpers/contrib/storage/linux/ceph.py @@ -158,15 +158,19 @@ def get_osd_settings(relation_name): return _order_dict_by_key(osd_settings) -def send_application_name(relid=None): +def send_application_name(relid=None, app_name=None): """Send the application name down the relation. :param relid: Relation id to set application name in. :type relid: str + :param app_name: Application name to send in the relation. + :type app_name: str """ + if app_name is None: + app_name = application_name() relation_set( relation_id=relid, - relation_settings={'application-name': application_name()}) + relation_settings={'application-name': app_name}) def send_osd_settings(): diff --git a/hooks/nova_compute_context.py b/hooks/nova_compute_context.py index c6ab5f82..89e1780f 100644 --- a/hooks/nova_compute_context.py +++ b/hooks/nova_compute_context.py @@ -38,6 +38,7 @@ from charmhelpers.fetch import apt_install, filter_installed_packages from charmhelpers.core.hookenv import ( config, + local_unit, log, relation_get, relation_ids, @@ -62,8 +63,14 @@ ) # This is just a label and it must be consistent across -# nova-compute nodes to support live migration. -CEPH_SECRET_UUID = '514c9fca-8cbe-11e2-9c52-3bc8c7819472' +# nova-compute nodes to support live migration. It needs to +# change whenever CEPH_SECRET_UUID also changes. +CEPH_AUTH_CRED_NAME = 'nova-compute-ceph-auth-c91ce26f' +CEPH_SECRET_UUID = 'c91ce26f-403d-4058-9c38-6b56e1c428e0' +# We keep the old secret to retain old credentials and support +# live-migrations between existing instances to newly deployed +# nodes. For more info see LP#2037003. +CEPH_OLD_SECRET_UUID = '514c9fca-8cbe-11e2-9c52-3bc8c7819472' OVS_BRIDGE = 'br-int' @@ -115,6 +122,16 @@ def get_availability_zone(): else config('default-availability-zone')) +def sent_ceph_application_name(): + app_name = None + for rid in relation_ids('ceph'): + app_name = relation_get( + 'application-name', rid=rid, unit=local_unit()) + # default to the old name, so it goes through the old path during + # ceph relation set up + return app_name or service_name() + + def _neutron_security_groups(): ''' Inspects current cloud-compute relation and determine if nova-c-c has @@ -435,13 +452,18 @@ def __call__(self): ctxt = super(NovaComputeCephContext, self).__call__() if not ctxt: return {} - svc = service_name() + svc = sent_ceph_application_name() + if svc == CEPH_AUTH_CRED_NAME: + secret_uuid = CEPH_SECRET_UUID + else: + secret_uuid = CEPH_OLD_SECRET_UUID + # secret.xml - ctxt['ceph_secret_uuid'] = CEPH_SECRET_UUID + ctxt['ceph_secret_uuid'] = secret_uuid # nova.conf ctxt['service_name'] = svc ctxt['rbd_user'] = svc - ctxt['rbd_secret_uuid'] = CEPH_SECRET_UUID + ctxt['rbd_secret_uuid'] = secret_uuid if config('pool-type') == 'erasure-coded': ctxt['rbd_pool'] = ( diff --git a/hooks/nova_compute_hooks.py b/hooks/nova_compute_hooks.py index 0aa1470f..8ab61556 100755 --- a/hooks/nova_compute_hooks.py +++ b/hooks/nova_compute_hooks.py @@ -35,6 +35,7 @@ log, DEBUG, INFO, + WARNING, ERROR, relation_ids, remote_service_name, @@ -106,7 +107,8 @@ services, register_configs, NOVA_CONF, - ceph_config_file, CEPH_SECRET, + ceph_config_file, + CEPH_SECRET, CEPH_BACKEND_SECRET, enable_shell, disable_shell, configure_lxd, @@ -136,6 +138,9 @@ from nova_compute_context import ( nova_metadata_requirement, + sent_ceph_application_name, + CEPH_OLD_SECRET_UUID, + CEPH_AUTH_CRED_NAME, CEPH_SECRET_UUID, assert_libvirt_rbd_imagebackend_allowed, get_availability_zone, @@ -454,6 +459,7 @@ def ceph_joined(): # Bug 1427660 if not is_unit_paused_set() and config('virt-type') in LIBVIRT_TYPES: service_restart(libvirt_daemon()) + # install old credentials first for backwards compatibility send_application_name() @@ -566,9 +572,16 @@ def ceph_changed(rid=None, unit=None): log('ceph relation incomplete. Peer not ready?') return - if not ensure_ceph_keyring(service=service_name(), user='nova', + sent_app_name = sent_ceph_application_name() + if sent_app_name == CEPH_AUTH_CRED_NAME: + secret_uuid = CEPH_SECRET_UUID + else: + secret_uuid = CEPH_OLD_SECRET_UUID + + if not ensure_ceph_keyring(service=sent_app_name, user='nova', group='nova'): - log('Could not create ceph keyring: peer not ready?') + log('Could not create ceph keyring ' + 'for {}: peer not ready?'.format(sent_app_name)) return CONFIGS.write(ceph_config_file()) @@ -580,7 +593,12 @@ def ceph_changed(rid=None, unit=None): key = relation_get(attribute='key', rid=rid, unit=unit) if config('virt-type') in ['kvm', 'qemu', 'lxc'] and key: create_libvirt_secret(secret_file=CEPH_SECRET, - secret_uuid=CEPH_SECRET_UUID, key=key) + secret_uuid=secret_uuid, key=key) + + if sent_app_name != CEPH_AUTH_CRED_NAME: + log('Sending application name for new ceph credentials after ' + 'setting up the old credentials') + send_application_name(relid=rid, app_name=CEPH_AUTH_CRED_NAME) try: _handle_ceph_request() @@ -591,7 +609,7 @@ def ceph_changed(rid=None, unit=None): # the hook execution. log('Caught ValueError, invalid value provided for ' 'configuration?: "{}"'.format(str(e)), - level=DEBUG) + level=WARNING) # TODO: Refactor this method moving part of this logic to charmhelpers, @@ -691,8 +709,11 @@ def _get_broker_rid_unit_for_previous_request(): @hooks.hook('ceph-relation-broken') def ceph_broken(): - service = service_name() - delete_keyring(service=service) + delete_keyring(service=CEPH_AUTH_CRED_NAME) + # cleanup old entries based on application name + svc_name = service_name() + if svc_name != CEPH_AUTH_CRED_NAME: + delete_keyring(service=svc_name) update_all_configs() @@ -718,6 +739,10 @@ def upgrade_charm(): for r_id in relation_ids('amqp'): amqp_joined(relation_id=r_id) + # Trigger upgrade-path from application_name to 'nova-compute' + if sent_ceph_application_name() != CEPH_AUTH_CRED_NAME: + for r_id in relation_ids('ceph'): + send_application_name(relid=r_id, app_name=CEPH_AUTH_CRED_NAME) if is_relation_made('nrpe-external-master'): update_nrpe_config() diff --git a/unit_tests/test_nova_compute_contexts.py b/unit_tests/test_nova_compute_contexts.py index f49d91f5..3f6558b6 100644 --- a/unit_tests/test_nova_compute_contexts.py +++ b/unit_tests/test_nova_compute_contexts.py @@ -26,6 +26,7 @@ 'relation_ids', 'relation_get', 'related_units', + 'service_name', 'config', 'log', '_save_flag_file', @@ -1024,6 +1025,25 @@ def test_nova_metadata_requirement(self): self.assertEqual(context.nova_metadata_requirement(), (True, None)) + @patch('nova_compute_context.local_unit') + def test_sent_ceph_application_name_empty_rel_data_default( + self, local_unit): + local_unit.return_value = 'nova-compute-kvm/0' + self.relation_ids.return_value = ['ceph:0'] + self.test_relation.set({}) + self.service_name.return_value = 'nova-compute-kvm' + self.assertEqual( + context.sent_ceph_application_name(), 'nova-compute-kvm') + + @patch('nova_compute_context.local_unit') + def test_sent_ceph_application_name(self, local_unit): + local_unit.return_value = 'nova-compute-kvm/0' + self.relation_ids.return_value = ['ceph:0'] + self.test_relation.set({'application-name': 'ceph-nova-compute'}) + self.service_name.return_value = 'nova-compute-kvm' + self.assertEqual( + context.sent_ceph_application_name(), 'ceph-nova-compute') + def test_nova_compute_extra_flags(self): self.test_config.set('cpu-model-extra-flags', 'pcid vmx pdpe1gb') self.lsb_release.return_value = {'DISTRIB_CODENAME': 'bionic'} @@ -1387,6 +1407,43 @@ def setUp(self): self.config.side_effect = self.test_config.get self.os_release.return_value = 'queens' + @patch('nova_compute_context.sent_ceph_application_name') + @patch('charmhelpers.contrib.openstack.context.CephContext.__call__') + def test_secret_uuid_old(self, mock_call, sent_name): + sent_name.return_value = 'nova-compute-kvm' + mock_call.return_value = {'foo': 'bar'} + ctxt = context.NovaComputeCephContext()() + self.assertEqual( + context.CEPH_OLD_SECRET_UUID, ctxt['ceph_secret_uuid']) + self.assertEqual( + context.CEPH_OLD_SECRET_UUID, ctxt['rbd_secret_uuid']) + self.assertEqual('nova-compute-kvm', ctxt['service_name']) + self.assertEqual('nova-compute-kvm', ctxt['rbd_user']) + + @patch('nova_compute_context.sent_ceph_application_name') + @patch('charmhelpers.contrib.openstack.context.CephContext.__call__') + def test_secret_uuid_old_2(self, mock_call, sent_name): + sent_name.return_value = 'nova-compute' + mock_call.return_value = {'foo': 'bar'} + ctxt = context.NovaComputeCephContext()() + self.assertEqual( + context.CEPH_OLD_SECRET_UUID, ctxt['ceph_secret_uuid']) + self.assertEqual( + context.CEPH_OLD_SECRET_UUID, ctxt['rbd_secret_uuid']) + self.assertEqual('nova-compute', ctxt['service_name']) + self.assertEqual('nova-compute', ctxt['rbd_user']) + + @patch('nova_compute_context.sent_ceph_application_name') + @patch('charmhelpers.contrib.openstack.context.CephContext.__call__') + def test_secret_uuid_new(self, mock_call, sent_name): + sent_name.return_value = context.CEPH_AUTH_CRED_NAME + mock_call.return_value = {'foo': 'bar'} + ctxt = context.NovaComputeCephContext()() + self.assertEqual(context.CEPH_SECRET_UUID, ctxt['ceph_secret_uuid']) + self.assertEqual(context.CEPH_SECRET_UUID, ctxt['rbd_secret_uuid']) + self.assertEqual(context.CEPH_AUTH_CRED_NAME, ctxt['service_name']) + self.assertEqual(context.CEPH_AUTH_CRED_NAME, ctxt['rbd_user']) + @patch.dict(context.os.environ, {'JUJU_UNIT_NAME': 'nova_compute'}, clear=True) @patch('charmhelpers.contrib.openstack.context.CephContext.__call__') diff --git a/unit_tests/test_nova_compute_hooks.py b/unit_tests/test_nova_compute_hooks.py index c4b56cb1..074946c1 100644 --- a/unit_tests/test_nova_compute_hooks.py +++ b/unit_tests/test_nova_compute_hooks.py @@ -69,6 +69,7 @@ 'get_relation_ip', # nova_compute_context 'nova_metadata_requirement', + 'sent_ceph_application_name', # nova_compute_utils # 'PACKAGES', 'create_libvirt_secret', @@ -618,10 +619,13 @@ def test_ceph_changed_no_keyring(self, configs): configs.complete_contexts = MagicMock() configs.complete_contexts.return_value = ['ceph'] self.ensure_ceph_keyring.return_value = False + self.sent_ceph_application_name.return_value = 'nova-compute' hooks.ceph_changed() self.log.assert_called_with( - 'Could not create ceph keyring: peer not ready?' + 'Could not create ceph keyring for nova-compute: peer not ready?' ) + self.ensure_ceph_keyring.assert_called_once_with( + service='nova-compute', user='nova', group='nova') @patch.object(hooks, '_handle_ceph_request') @patch.object(hooks, 'create_libvirt_secret') @@ -633,26 +637,64 @@ def test_ceph_changed_with_key_and_relation_data(self, configs, _handle_ceph_request): configs.complete_contexts = MagicMock() configs.complete_contexts.return_value = ['ceph'] + self.sent_ceph_application_name.return_value = 'nova-compute-kvm' + service_name.return_value = 'nova-compute-kvm' self.ensure_ceph_keyring.return_value = True configs.write = MagicMock() - service_name.return_value = 'nova-compute' key = {'data': 'key'} self.relation_get.return_value = key hooks.ceph_changed() + self.send_application_name.assert_called_once_with( + relid=None, app_name=hooks.CEPH_AUTH_CRED_NAME) ex = [ - call('/var/lib/charm/nova-compute/ceph.conf'), + call('/var/lib/charm/nova-compute-kvm/ceph.conf'), call('/etc/ceph/secret.xml'), call('/etc/nova/nova.conf'), ] self.assertEqual(ex, configs.write.call_args_list) create_libvirt_secret.assert_called_once_with( secret_file='/etc/ceph/secret.xml', key=key, - secret_uuid=hooks.CEPH_SECRET_UUID) + secret_uuid=hooks.CEPH_OLD_SECRET_UUID) # confirm exception is caught _handle_ceph_request.side_effect = ValueError hooks.ceph_changed() - self.log.assert_called_once() + self.log.assert_has_calls([ + call('Sending application name for new ceph credentials after' + ' setting up the old credentials'), + call('Caught ValueError, invalid value provided' + ' for configuration?: ""', level='WARNING') + ]) + + @patch.object(hooks, '_handle_ceph_request') + @patch.object(hooks, 'create_libvirt_secret') + @patch('nova_compute_context.service_name') + @patch.object(hooks, 'CONFIGS') + def test_ceph_changed_with_key_and_relation_data_new_ceph_creds( + self, configs, service_name, create_libvirt_secret, + _handle_ceph_request): + configs.complete_contexts = MagicMock() + configs.complete_contexts.return_value = ['ceph'] + self.sent_ceph_application_name.return_value = ( + hooks.CEPH_AUTH_CRED_NAME) + service_name.return_value = 'nova-compute-kvm' + self.ensure_ceph_keyring.return_value = True + configs.write = MagicMock() + key = {'data': 'key'} + self.relation_get.return_value = key + + hooks.ceph_changed() + self.send_application_name.assert_not_called() + ex = [ + call('/var/lib/charm/nova-compute-kvm/ceph.conf'), + call('/etc/ceph/secret.xml'), + call('/etc/nova/nova.conf'), + ] + self.assertEqual(ex, configs.write.call_args_list) + create_libvirt_secret.assert_called_once_with( + secret_file='/etc/ceph/secret.xml', key=key, + secret_uuid=hooks.CEPH_SECRET_UUID) + self.log.assert_not_called() @patch.object(hooks, 'get_ceph_request') @patch.object(hooks, 'get_request_states') @@ -1299,7 +1341,37 @@ def test_upgrade_charm(self, getgrnam): grp_mock.gr_gid = None getgrnam.return_value = grp_mock self.remove_old_packages.return_value = False + self.sent_ceph_application_name.return_value = 'nova-compute' + hooks.upgrade_charm() + self.send_application_name.assert_not_called() + self.remove_old_packages.assert_called_once_with() + self.assertFalse(self.service_restart.called) + + @patch.object(hooks.grp, 'getgrnam') + def test_upgrade_charm_send_app_name(self, getgrnam): + grp_mock = MagicMock() + grp_mock.gr_gid = None + getgrnam.return_value = grp_mock + self.remove_old_packages.return_value = False + self.sent_ceph_application_name.return_value = 'nova-compute-kvm' + self.relation_ids.return_value = ['ceph:0'] + hooks.upgrade_charm() + self.send_application_name.assert_called_once_with( + relid='ceph:0', app_name=hooks.CEPH_AUTH_CRED_NAME) + self.remove_old_packages.assert_called_once_with() + self.assertFalse(self.service_restart.called) + + @patch.object(hooks.grp, 'getgrnam') + def test_upgrade_charm_send_app_name_2(self, getgrnam): + grp_mock = MagicMock() + grp_mock.gr_gid = None + getgrnam.return_value = grp_mock + self.remove_old_packages.return_value = False + self.sent_ceph_application_name.return_value = 'nova-compute' + self.relation_ids.return_value = ['ceph:0'] hooks.upgrade_charm() + self.send_application_name.assert_called_once_with( + relid='ceph:0', app_name=hooks.CEPH_AUTH_CRED_NAME) self.remove_old_packages.assert_called_once_with() self.assertFalse(self.service_restart.called)