Skip to content

Commit

Permalink
[v2] Fix migration across nova-compute apps using ceph
Browse files Browse the repository at this point in the history
This change reworks previous changes [1] and [2] that had
been respectively reverted and abandoned.

When using the config libvirt-image-backend=rbd, VMs
created from image have their disk data stored in ceph
instead of the compute node itself.

When performing live-migrations, both nodes need to
access the same ceph credentials to access the VM's
disk in ceph, but this is currently not possible
if the nodes involved pertain to different
nova-compute charm apps.

This patch changes app name sent to ceph to
'nova-compute-ceph-auth-c91ce26f', a unique name common to
all nova-compute apps, allowing all nova-compute apps to
use the same ceph auth.

This change also ensures newly deployed nodes install
the old credentials first on ceph-joined hook,
and then supercedes it with the new credentials
on ceph-changed hook, therefore also retaining
the old credentials.

This patch also includes the charmhelpers sync
from PR: #840

[1] https://review.opendev.org/889642
[2] https://review.opendev.org/896155

Closes-bug: #2028559
Related-bug: #2037003

Func-Test-Pr: openstack-charmers/zaza-openstack-tests#1149

Change-Id: I1ae12d787a1f8e7761ca06b5a80049c1c62e9e90
  • Loading branch information
rodrigogansobarbieri committed Jan 5, 2024
1 parent 4d6f4c0 commit 650f3a5
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 19 deletions.
8 changes: 6 additions & 2 deletions hooks/charmhelpers/contrib/storage/linux/ceph.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,15 +158,19 @@ def get_osd_settings(relation_name):
return _order_dict_by_key(osd_settings)


def send_application_name(relid=None):
def send_application_name(relid=None, app_name=None):
"""Send the application name down the relation.
:param relid: Relation id to set application name in.
:type relid: str
:param app_name: Application name to send in the relation.
:type app_name: str
"""
if app_name is None:
app_name = application_name()
relation_set(
relation_id=relid,
relation_settings={'application-name': application_name()})
relation_settings={'application-name': app_name})


def send_osd_settings():
Expand Down
32 changes: 27 additions & 5 deletions hooks/nova_compute_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from charmhelpers.fetch import apt_install, filter_installed_packages
from charmhelpers.core.hookenv import (
config,
local_unit,
log,
relation_get,
relation_ids,
Expand All @@ -62,8 +63,14 @@
)

# This is just a label and it must be consistent across
# nova-compute nodes to support live migration.
CEPH_SECRET_UUID = '514c9fca-8cbe-11e2-9c52-3bc8c7819472'
# nova-compute nodes to support live migration. It needs to
# change whenever CEPH_SECRET_UUID also changes.
CEPH_AUTH_CRED_NAME = 'nova-compute-ceph-auth-c91ce26f'
CEPH_SECRET_UUID = 'c91ce26f-403d-4058-9c38-6b56e1c428e0'
# We keep the old secret to retain old credentials and support
# live-migrations between existing instances to newly deployed
# nodes. For more info see LP#2037003.
CEPH_OLD_SECRET_UUID = '514c9fca-8cbe-11e2-9c52-3bc8c7819472'

OVS_BRIDGE = 'br-int'

Expand Down Expand Up @@ -115,6 +122,16 @@ def get_availability_zone():
else config('default-availability-zone'))


def sent_ceph_application_name():
app_name = None
for rid in relation_ids('ceph'):
app_name = relation_get(
'application-name', rid=rid, unit=local_unit())
# default to the old name, so it goes through the old path during
# ceph relation set up
return app_name or service_name()


def _neutron_security_groups():
'''
Inspects current cloud-compute relation and determine if nova-c-c has
Expand Down Expand Up @@ -435,13 +452,18 @@ def __call__(self):
ctxt = super(NovaComputeCephContext, self).__call__()
if not ctxt:
return {}
svc = service_name()
svc = sent_ceph_application_name()
if svc == CEPH_AUTH_CRED_NAME:
secret_uuid = CEPH_SECRET_UUID
else:
secret_uuid = CEPH_OLD_SECRET_UUID

# secret.xml
ctxt['ceph_secret_uuid'] = CEPH_SECRET_UUID
ctxt['ceph_secret_uuid'] = secret_uuid
# nova.conf
ctxt['service_name'] = svc
ctxt['rbd_user'] = svc
ctxt['rbd_secret_uuid'] = CEPH_SECRET_UUID
ctxt['rbd_secret_uuid'] = secret_uuid

if config('pool-type') == 'erasure-coded':
ctxt['rbd_pool'] = (
Expand Down
39 changes: 32 additions & 7 deletions hooks/nova_compute_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
log,
DEBUG,
INFO,
WARNING,
ERROR,
relation_ids,
remote_service_name,
Expand Down Expand Up @@ -106,7 +107,8 @@
services,
register_configs,
NOVA_CONF,
ceph_config_file, CEPH_SECRET,
ceph_config_file,
CEPH_SECRET,
CEPH_BACKEND_SECRET,
enable_shell, disable_shell,
configure_lxd,
Expand Down Expand Up @@ -136,6 +138,9 @@

from nova_compute_context import (
nova_metadata_requirement,
sent_ceph_application_name,
CEPH_OLD_SECRET_UUID,
CEPH_AUTH_CRED_NAME,
CEPH_SECRET_UUID,
assert_libvirt_rbd_imagebackend_allowed,
get_availability_zone,
Expand Down Expand Up @@ -454,6 +459,7 @@ def ceph_joined():
# Bug 1427660
if not is_unit_paused_set() and config('virt-type') in LIBVIRT_TYPES:
service_restart(libvirt_daemon())
# install old credentials first for backwards compatibility
send_application_name()


Expand Down Expand Up @@ -566,9 +572,16 @@ def ceph_changed(rid=None, unit=None):
log('ceph relation incomplete. Peer not ready?')
return

if not ensure_ceph_keyring(service=service_name(), user='nova',
sent_app_name = sent_ceph_application_name()
if sent_app_name == CEPH_AUTH_CRED_NAME:
secret_uuid = CEPH_SECRET_UUID
else:
secret_uuid = CEPH_OLD_SECRET_UUID

if not ensure_ceph_keyring(service=sent_app_name, user='nova',
group='nova'):
log('Could not create ceph keyring: peer not ready?')
log('Could not create ceph keyring '
'for {}: peer not ready?'.format(sent_app_name))
return

CONFIGS.write(ceph_config_file())
Expand All @@ -580,7 +593,12 @@ def ceph_changed(rid=None, unit=None):
key = relation_get(attribute='key', rid=rid, unit=unit)
if config('virt-type') in ['kvm', 'qemu', 'lxc'] and key:
create_libvirt_secret(secret_file=CEPH_SECRET,
secret_uuid=CEPH_SECRET_UUID, key=key)
secret_uuid=secret_uuid, key=key)

if sent_app_name != CEPH_AUTH_CRED_NAME:
log('Sending application name for new ceph credentials after '
'setting up the old credentials')
send_application_name(relid=rid, app_name=CEPH_AUTH_CRED_NAME)

try:
_handle_ceph_request()
Expand All @@ -591,7 +609,7 @@ def ceph_changed(rid=None, unit=None):
# the hook execution.
log('Caught ValueError, invalid value provided for '
'configuration?: "{}"'.format(str(e)),
level=DEBUG)
level=WARNING)


# TODO: Refactor this method moving part of this logic to charmhelpers,
Expand Down Expand Up @@ -691,8 +709,11 @@ def _get_broker_rid_unit_for_previous_request():

@hooks.hook('ceph-relation-broken')
def ceph_broken():
service = service_name()
delete_keyring(service=service)
delete_keyring(service=CEPH_AUTH_CRED_NAME)
# cleanup old entries based on application name
svc_name = service_name()
if svc_name != CEPH_AUTH_CRED_NAME:
delete_keyring(service=svc_name)
update_all_configs()


Expand All @@ -718,6 +739,10 @@ def upgrade_charm():

for r_id in relation_ids('amqp'):
amqp_joined(relation_id=r_id)
# Trigger upgrade-path from application_name to 'nova-compute'
if sent_ceph_application_name() != CEPH_AUTH_CRED_NAME:
for r_id in relation_ids('ceph'):
send_application_name(relid=r_id, app_name=CEPH_AUTH_CRED_NAME)

if is_relation_made('nrpe-external-master'):
update_nrpe_config()
Expand Down
57 changes: 57 additions & 0 deletions unit_tests/test_nova_compute_contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
'relation_ids',
'relation_get',
'related_units',
'service_name',
'config',
'log',
'_save_flag_file',
Expand Down Expand Up @@ -1024,6 +1025,25 @@ def test_nova_metadata_requirement(self):
self.assertEqual(context.nova_metadata_requirement(),
(True, None))

@patch('nova_compute_context.local_unit')
def test_sent_ceph_application_name_empty_rel_data_default(
self, local_unit):
local_unit.return_value = 'nova-compute-kvm/0'
self.relation_ids.return_value = ['ceph:0']
self.test_relation.set({})
self.service_name.return_value = 'nova-compute-kvm'
self.assertEqual(
context.sent_ceph_application_name(), 'nova-compute-kvm')

@patch('nova_compute_context.local_unit')
def test_sent_ceph_application_name(self, local_unit):
local_unit.return_value = 'nova-compute-kvm/0'
self.relation_ids.return_value = ['ceph:0']
self.test_relation.set({'application-name': 'ceph-nova-compute'})
self.service_name.return_value = 'nova-compute-kvm'
self.assertEqual(
context.sent_ceph_application_name(), 'ceph-nova-compute')

def test_nova_compute_extra_flags(self):
self.test_config.set('cpu-model-extra-flags', 'pcid vmx pdpe1gb')
self.lsb_release.return_value = {'DISTRIB_CODENAME': 'bionic'}
Expand Down Expand Up @@ -1387,6 +1407,43 @@ def setUp(self):
self.config.side_effect = self.test_config.get
self.os_release.return_value = 'queens'

@patch('nova_compute_context.sent_ceph_application_name')
@patch('charmhelpers.contrib.openstack.context.CephContext.__call__')
def test_secret_uuid_old(self, mock_call, sent_name):
sent_name.return_value = 'nova-compute-kvm'
mock_call.return_value = {'foo': 'bar'}
ctxt = context.NovaComputeCephContext()()
self.assertEqual(
context.CEPH_OLD_SECRET_UUID, ctxt['ceph_secret_uuid'])
self.assertEqual(
context.CEPH_OLD_SECRET_UUID, ctxt['rbd_secret_uuid'])
self.assertEqual('nova-compute-kvm', ctxt['service_name'])
self.assertEqual('nova-compute-kvm', ctxt['rbd_user'])

@patch('nova_compute_context.sent_ceph_application_name')
@patch('charmhelpers.contrib.openstack.context.CephContext.__call__')
def test_secret_uuid_old_2(self, mock_call, sent_name):
sent_name.return_value = 'nova-compute'
mock_call.return_value = {'foo': 'bar'}
ctxt = context.NovaComputeCephContext()()
self.assertEqual(
context.CEPH_OLD_SECRET_UUID, ctxt['ceph_secret_uuid'])
self.assertEqual(
context.CEPH_OLD_SECRET_UUID, ctxt['rbd_secret_uuid'])
self.assertEqual('nova-compute', ctxt['service_name'])
self.assertEqual('nova-compute', ctxt['rbd_user'])

@patch('nova_compute_context.sent_ceph_application_name')
@patch('charmhelpers.contrib.openstack.context.CephContext.__call__')
def test_secret_uuid_new(self, mock_call, sent_name):
sent_name.return_value = context.CEPH_AUTH_CRED_NAME
mock_call.return_value = {'foo': 'bar'}
ctxt = context.NovaComputeCephContext()()
self.assertEqual(context.CEPH_SECRET_UUID, ctxt['ceph_secret_uuid'])
self.assertEqual(context.CEPH_SECRET_UUID, ctxt['rbd_secret_uuid'])
self.assertEqual(context.CEPH_AUTH_CRED_NAME, ctxt['service_name'])
self.assertEqual(context.CEPH_AUTH_CRED_NAME, ctxt['rbd_user'])

@patch.dict(context.os.environ, {'JUJU_UNIT_NAME': 'nova_compute'},
clear=True)
@patch('charmhelpers.contrib.openstack.context.CephContext.__call__')
Expand Down
82 changes: 77 additions & 5 deletions unit_tests/test_nova_compute_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
'get_relation_ip',
# nova_compute_context
'nova_metadata_requirement',
'sent_ceph_application_name',
# nova_compute_utils
# 'PACKAGES',
'create_libvirt_secret',
Expand Down Expand Up @@ -618,10 +619,13 @@ def test_ceph_changed_no_keyring(self, configs):
configs.complete_contexts = MagicMock()
configs.complete_contexts.return_value = ['ceph']
self.ensure_ceph_keyring.return_value = False
self.sent_ceph_application_name.return_value = 'nova-compute'
hooks.ceph_changed()
self.log.assert_called_with(
'Could not create ceph keyring: peer not ready?'
'Could not create ceph keyring for nova-compute: peer not ready?'
)
self.ensure_ceph_keyring.assert_called_once_with(
service='nova-compute', user='nova', group='nova')

@patch.object(hooks, '_handle_ceph_request')
@patch.object(hooks, 'create_libvirt_secret')
Expand All @@ -633,26 +637,64 @@ def test_ceph_changed_with_key_and_relation_data(self, configs,
_handle_ceph_request):
configs.complete_contexts = MagicMock()
configs.complete_contexts.return_value = ['ceph']
self.sent_ceph_application_name.return_value = 'nova-compute-kvm'
service_name.return_value = 'nova-compute-kvm'
self.ensure_ceph_keyring.return_value = True
configs.write = MagicMock()
service_name.return_value = 'nova-compute'
key = {'data': 'key'}
self.relation_get.return_value = key

hooks.ceph_changed()
self.send_application_name.assert_called_once_with(
relid=None, app_name=hooks.CEPH_AUTH_CRED_NAME)
ex = [
call('/var/lib/charm/nova-compute/ceph.conf'),
call('/var/lib/charm/nova-compute-kvm/ceph.conf'),
call('/etc/ceph/secret.xml'),
call('/etc/nova/nova.conf'),
]
self.assertEqual(ex, configs.write.call_args_list)
create_libvirt_secret.assert_called_once_with(
secret_file='/etc/ceph/secret.xml', key=key,
secret_uuid=hooks.CEPH_SECRET_UUID)
secret_uuid=hooks.CEPH_OLD_SECRET_UUID)
# confirm exception is caught
_handle_ceph_request.side_effect = ValueError
hooks.ceph_changed()
self.log.assert_called_once()
self.log.assert_has_calls([
call('Sending application name for new ceph credentials after'
' setting up the old credentials'),
call('Caught ValueError, invalid value provided'
' for configuration?: ""', level='WARNING')
])

@patch.object(hooks, '_handle_ceph_request')
@patch.object(hooks, 'create_libvirt_secret')
@patch('nova_compute_context.service_name')
@patch.object(hooks, 'CONFIGS')
def test_ceph_changed_with_key_and_relation_data_new_ceph_creds(
self, configs, service_name, create_libvirt_secret,
_handle_ceph_request):
configs.complete_contexts = MagicMock()
configs.complete_contexts.return_value = ['ceph']
self.sent_ceph_application_name.return_value = (
hooks.CEPH_AUTH_CRED_NAME)
service_name.return_value = 'nova-compute-kvm'
self.ensure_ceph_keyring.return_value = True
configs.write = MagicMock()
key = {'data': 'key'}
self.relation_get.return_value = key

hooks.ceph_changed()
self.send_application_name.assert_not_called()
ex = [
call('/var/lib/charm/nova-compute-kvm/ceph.conf'),
call('/etc/ceph/secret.xml'),
call('/etc/nova/nova.conf'),
]
self.assertEqual(ex, configs.write.call_args_list)
create_libvirt_secret.assert_called_once_with(
secret_file='/etc/ceph/secret.xml', key=key,
secret_uuid=hooks.CEPH_SECRET_UUID)
self.log.assert_not_called()

@patch.object(hooks, 'get_ceph_request')
@patch.object(hooks, 'get_request_states')
Expand Down Expand Up @@ -1299,7 +1341,37 @@ def test_upgrade_charm(self, getgrnam):
grp_mock.gr_gid = None
getgrnam.return_value = grp_mock
self.remove_old_packages.return_value = False
self.sent_ceph_application_name.return_value = 'nova-compute'
hooks.upgrade_charm()
self.send_application_name.assert_not_called()
self.remove_old_packages.assert_called_once_with()
self.assertFalse(self.service_restart.called)

@patch.object(hooks.grp, 'getgrnam')
def test_upgrade_charm_send_app_name(self, getgrnam):
grp_mock = MagicMock()
grp_mock.gr_gid = None
getgrnam.return_value = grp_mock
self.remove_old_packages.return_value = False
self.sent_ceph_application_name.return_value = 'nova-compute-kvm'
self.relation_ids.return_value = ['ceph:0']
hooks.upgrade_charm()
self.send_application_name.assert_called_once_with(
relid='ceph:0', app_name=hooks.CEPH_AUTH_CRED_NAME)
self.remove_old_packages.assert_called_once_with()
self.assertFalse(self.service_restart.called)

@patch.object(hooks.grp, 'getgrnam')
def test_upgrade_charm_send_app_name_2(self, getgrnam):
grp_mock = MagicMock()
grp_mock.gr_gid = None
getgrnam.return_value = grp_mock
self.remove_old_packages.return_value = False
self.sent_ceph_application_name.return_value = 'nova-compute'
self.relation_ids.return_value = ['ceph:0']
hooks.upgrade_charm()
self.send_application_name.assert_called_once_with(
relid='ceph:0', app_name=hooks.CEPH_AUTH_CRED_NAME)
self.remove_old_packages.assert_called_once_with()
self.assertFalse(self.service_restart.called)

Expand Down

0 comments on commit 650f3a5

Please sign in to comment.