Skip to content

Commit

Permalink
Improvements to further stablise the manila-ganesha tests
Browse files Browse the repository at this point in the history
This improves the manila-ganesha tests by checking that ceph is
stable/healthy, and b) ensuring that after the restart of
manila-ganesha, that it is stable/healthy.
  • Loading branch information
ajkavanagh committed Jul 16, 2024
1 parent 1e2a8a1 commit 3873dd5
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 9 deletions.
11 changes: 9 additions & 2 deletions zaza/openstack/charm_tests/ceilometer_agent/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,31 @@ def test_400_gnocchi_metrics(self):

expected_metric_names = self.__get_expected_metric_names(
current_os_release)
logging.info("Expected metric names: %s",
', '.join(sorted(expected_metric_names)))

min_timeout_seconds = 500
polling_interval_seconds = (
polling_interval_seconds = int(
openstack_utils.get_application_config_option(
self.application_name, 'polling-interval'))
self.application_name, 'polling-interval') or 30)
timeout_seconds = max(10 * polling_interval_seconds,
min_timeout_seconds)
logging.info('Giving ceilometer-agent {}s to publish all metrics to '
'gnocchi...'.format(timeout_seconds))

max_time = time.time() + timeout_seconds
while time.time() < max_time:
logging.info("... Looking:")
found_metric_names = {metric['name']
for metric in gnocchi.metric.list()}
logging.info("... found metric names: %s",
', '.join(sorted(found_metric_names)))
missing_metric_names = expected_metric_names - found_metric_names
if len(missing_metric_names) == 0:
logging.info('All expected metrics found.')
break
logging.info("... still missing: %s",
', '.join(sorted(missing_metric_names)))
time.sleep(polling_interval_seconds)

unexpected_found_metric_names = (
Expand Down
56 changes: 56 additions & 0 deletions zaza/openstack/charm_tests/manila/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,35 @@ def _mount_share_on_instance(self, instance_ip, ssh_user_name,
command=ssh_cmd,
verify=verify_status)

def _umount_share_on_instance(self, instance_ip, ssh_user_name,
ssh_private_key, share_path):
"""Umount a share from a Nova instance.
The mount command is executed via SSH.
:param instance_ip: IP of the Nova instance.
:type instance_ip: string
:param ssh_user_name: SSH user name.
:type ssh_user_name: string
:param ssh_private_key: SSH private key.
:type ssh_private_key: string
:param share_path: share network path.
:type share_path: string
"""
ssh_cmd = 'sudo umount {mount_dir}'.format(mount_dir=self.mount_dir)

for attempt in tenacity.Retrying(
stop=tenacity.stop_after_attempt(5),
wait=tenacity.wait_exponential(multiplier=3, min=2, max=10)):
with attempt:
openstack_utils.ssh_command(
vm_name="instance-{}".format(instance_ip),
ip=instance_ip,
username=ssh_user_name,
privkey=ssh_private_key,
command=ssh_cmd,
verify=verify_status)

@tenacity.retry(
stop=tenacity.stop_after_attempt(5),
wait=tenacity.wait_exponential(multiplier=3, min=2, max=10))
Expand Down Expand Up @@ -323,6 +352,23 @@ def _restart_share_instance(self):
"""
return False

def _wait_for_ceph_healthy(self):
"""Wait until the ceph health is healthy"""
logging.info("Waiting for ceph to be healthy")
for attempt in tenacity.Retrying(
wait=tenacity.wait_fixed(5),
stop=tenacity.stop_after_attempt(10),
reraise=True
):
logging.info("... testing Ceph")
with attempt:
self.assertEqual(
zaza.model.run_on_leader(
"ceph-mon", "sudo ceph health")["Code"],
"0"
)
logging.info("...Ceph is healthy")

def test_manila_share(self):
"""Test that a Manila share can be accessed on two instances.
Expand All @@ -346,6 +392,10 @@ def test_manila_share(self):
fip_1 = neutron_tests.floating_ips_from_instance(instance_1)[0]
fip_2 = neutron_tests.floating_ips_from_instance(instance_2)[0]

# force a restart to clear out any clients that may be hanging around
# due to restarts on manila-ganesha during deployment.
self._restart_share_instance()
self._wait_for_ceph_healthy()
# Create a share
share = self.manila_client.shares.create(
share_type=self.share_type_name,
Expand Down Expand Up @@ -403,3 +453,9 @@ def test_manila_share(self):
fip_2, ssh_user_name, privkey, share_path)
self._validate_testing_file_from_instance(
fip_2, ssh_user_name, privkey)

# now umount the share on each instance to allow cleaning up.
self._umount_share_on_instance(
fip_1, ssh_user_name, privkey, share_path)
self._umount_share_on_instance(
fip_2, ssh_user_name, privkey, share_path)
75 changes: 73 additions & 2 deletions zaza/openstack/charm_tests/manila_ganesha/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
MANILA_GANESHA_TYPE_NAME,
)

from zaza import sync_wrapper
import zaza.openstack.utilities.generic as generic_utils
import zaza.openstack.charm_tests.manila.tests as manila_tests
import zaza.model
Expand Down Expand Up @@ -54,29 +55,99 @@ def _restart_share_instance(self):
self.model_name,
ganeshas))
for ganesha in ganeshas:
ganesha_unit = zaza.model.get_units(ganesha)[0]
units = zaza.model.get_units(ganesha)
ganesha_unit = units[0]
hacluster_unit = zaza_utils_juju.get_subordinate_units(
[ganesha_unit.entity_id],
charm_name='hacluster')
logging.info('Ganesha in hacluster mode: {}'.format(
bool(hacluster_unit)))

for unit in zaza.model.get_units(ganesha):
for unit in units:
if hacluster_unit:
# While we really only need to run this on the machine
# hosting # nfs-ganesha and manila-share, running it
# everywhere isn't harmful. Pacemaker handles restarting
# the services
logging.info(
"For %s, running systemctl stop manila-share "
"nfs-ganesha", unit.entity_id)
zaza.model.run_on_unit(
unit.entity_id,
"systemctl stop manila-share nfs-ganesha")
else:
logging.info(
"For %s, running systemctl restart manila-share "
"nfs-ganesha", unit.entity_id)
zaza.model.run_on_unit(
unit.entity_id,
"systemctl restart manila-share nfs-ganesha")

if hacluster_unit:
# now ensure that at least one manila-share and nfs-ganesha is
# at least running.
unit_names = [unit.entity_id for unit in units]
logging.info(
"Blocking until at least one manila-share is running")
self._block_until_at_least_one_unit_running_services(
unit_names, ['manila-share'])
else:
# block until they are all running.
for unit in units:
zaza.model.block_until_service_status(
unit_name=unit.entity_id,
services=['manila-share'],
target_status='running'
)

return True

@staticmethod
def _block_until_at_least_one_unit_running_services(
units, services, model_name=None, timeout=None):
"""Block until at least one unit is running the provided services.
:param units: List of names of unit to run action on
:type units: List[str]
:param services: List of services to check
:type services: List[str]
"""
async def _check_services():
for unit_name in units:
running_services = {}
for service in services:
command = r"pidof -x '{}'".format(service)
out = await zaza.model.async_run_on_unit(
unit_name,
command,
model_name=model_name,
timeout=timeout)
response_size = len(out['Stdout'].strip())
# response_size == 0 means NOT running.
running_services[service] = (response_size > 0)
states = ', '.join('{}: {}'.format(k, v)
for k, v in
running_services.items())
# Note this blocks the async call, but we don't really care as
# it should only be a short time.
logging.info('For unit {unit}, services: {states}'
.format(unit=unit_name, states=states))
active_services = [
service
for service, running in running_services.items()
if running]
if len(active_services) == len(services):
# all services are running
return True
# No unit has all services running
return False

async def _await_block():
await zaza.model.async_block_until(
_check_services, timeout=timeout)

sync_wrapper(_await_block)()

def _run_nrpe_check_command(self, commands):
try:
zaza.model.get_application("nrpe")
Expand Down
1 change: 0 additions & 1 deletion zaza/openstack/utilities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def __init__(self, obj, num_retries=3, initial_interval=5.0, backoff=1.0,
'retry_exceptions': retry_exceptions,
'log': _log,
}
_log(f"ObjectRetrierWraps: wrapping {self.__obj}")

def __getattr__(self, name):
"""Get attribute; delegates to wrapped object."""
Expand Down
10 changes: 6 additions & 4 deletions zaza/openstack/utilities/openstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
from zaza.openstack.utilities import (
exceptions,
generic as generic_utils,
ObjectRetrierWraps,
retry_on_connect_failure,
)
import zaza.utilities.networking as network_utils

Expand Down Expand Up @@ -385,7 +385,7 @@ def get_nova_session_client(session, version=None):
"""
if not version:
version = 2
return ObjectRetrierWraps(
return retry_on_connect_failure(
novaclient_client.Client(version, session=session))


Expand Down Expand Up @@ -2323,7 +2323,9 @@ def get_remote_ca_cert_file(application, model_name=None):
model.scp_from_unit(
unit,
cert_file,
_tmp_ca.name)
_tmp_ca.name,
scp_opts='-q',
)
except JujuError:
continue
# ensure that the path to put the local cacert in actually exists.
Expand Down Expand Up @@ -2565,7 +2567,7 @@ def resource_removed(resource,
msg='resource',
wait_exponential_multiplier=1,
wait_iteration_max_time=60,
stop_after_attempt=8):
stop_after_attempt=30):
"""Wait for an openstack resource to no longer be present.
:param resource: pointer to os resource type, ex: heat_client.stacks
Expand Down

0 comments on commit 3873dd5

Please sign in to comment.