From f1d9d3710ea37bf1ff03ebad2d91446e9968304b Mon Sep 17 00:00:00 2001
From: phvalguima <pedro.guimaraes@canonical.com>
Date: Fri, 1 Mar 2024 17:17:21 +0100
Subject: [PATCH] [DPE-3573][DPE-3579] Fix #164 - check _cluster/settings
 instead of opensearch.yml for plugins (#177)

Adds a GET /_cluster/settings, which returns the configuration of the
cluster at the moment, including defaults. This way, plugins can check
if they should add their configs and request a restart during the start
cycle of the charm.

The `_is_enabled` is also extended to use the cluster settings instead
of checking the configuration files directly.

Moved `OpenSearchPluginConfig` to extend `pydantic.BaseModel`. It
ensures that configuration is translated to `Dict[str, str]`, in special
the boolean value, that must have a format of: `true` or `false`.

Also closes [DPE-3579](https://warthogs.atlassian.net/browse/DPE-3579):
move the plugin settings status to use the charm's `status.{set,clear}`
APIs.
---
 lib/charms/opensearch/v0/helper_cluster.py    |  17 +++
 .../opensearch/v0/opensearch_backups.py       |  13 +-
 .../opensearch/v0/opensearch_base_charm.py    |  19 +--
 .../v0/opensearch_plugin_manager.py           | 127 ++++++++++++------
 .../opensearch/v0/opensearch_plugins.py       |  58 ++++----
 .../v0/opensearch_relation_provider.py        |  22 +--
 .../integration/ha/test_large_deployments.py  |   4 +
 tests/integration/plugins/test_plugins.py     |  10 +-
 tests/unit/lib/test_backups.py                |  12 +-
 tests/unit/lib/test_helper_cluster.py         |  35 +++++
 tests/unit/lib/test_ml_plugins.py             |  14 +-
 tests/unit/lib/test_plugins.py                |  48 +++++--
 12 files changed, 261 insertions(+), 118 deletions(-)

diff --git a/lib/charms/opensearch/v0/helper_cluster.py b/lib/charms/opensearch/v0/helper_cluster.py
index af60ffa662..1b2baf779f 100644
--- a/lib/charms/opensearch/v0/helper_cluster.py
+++ b/lib/charms/opensearch/v0/helper_cluster.py
@@ -57,6 +57,23 @@ def suggest_roles(nodes: List[Node], planned_units: int) -> List[str]:
 
         return base_roles + ["cluster_manager"]
 
+    @staticmethod
+    def get_cluster_settings(
+        opensearch: OpenSearchDistribution,
+        host: Optional[str] = None,
+        alt_hosts: Optional[List[str]] = None,
+        include_defaults: bool = False,
+    ) -> Dict[str, any]:
+        """Get the cluster settings."""
+        settings = opensearch.request(
+            "GET",
+            f"/_cluster/settings?flat_settings=true&include_defaults={str(include_defaults).lower()}",
+            host=host,
+            alt_hosts=alt_hosts,
+        )
+
+        return dict(settings["defaults"] | settings["persistent"] | settings["transient"])
+
     @staticmethod
     def recompute_nodes_conf(app_name: str, nodes: List[Node]) -> Dict[str, Node]:
         """Recompute the configuration of all the nodes (cluster set to auto-generate roles)."""
diff --git a/lib/charms/opensearch/v0/opensearch_backups.py b/lib/charms/opensearch/v0/opensearch_backups.py
index 7da706816d..47267e1e5b 100644
--- a/lib/charms/opensearch/v0/opensearch_backups.py
+++ b/lib/charms/opensearch/v0/opensearch_backups.py
@@ -3,8 +3,8 @@
 
 """OpenSearch Backup.
 
-This file holds the implementation of the OpenSearchBackup, OpenSearchBackupPlugin classes
-as well as the configuration and state enum.
+This file holds the implementation of the OpenSearchBackup class, as well as the state enum
+and configuration.
 
 The OpenSearchBackup class listens to both relation changes from S3_RELATION and API calls
 and responses. The OpenSearchBackupPlugin holds the configuration info. The classes together
@@ -467,7 +467,7 @@ def _query_backup_status(self, backup_id=None) -> BackupServiceState:
             return BackupServiceState.RESPONSE_FAILED_NETWORK
         return self.get_service_status(output)
 
-    def _on_s3_credentials_changed(self, event: EventBase) -> None:
+    def _on_s3_credentials_changed(self, event: EventBase) -> None:  # noqa: C901
         """Calls the plugin manager config handler.
 
         This method will iterate over the s3 relation and check:
@@ -489,8 +489,14 @@ def _on_s3_credentials_changed(self, event: EventBase) -> None:
         self.charm.status.set(MaintenanceStatus(BackupSetupStart))
 
         try:
+            if not self.charm.plugin_manager.check_plugin_manager_ready():
+                raise OpenSearchNotFullyReadyError()
+
             plugin = self.charm.plugin_manager.get_plugin(OpenSearchBackupPlugin)
             if self.charm.plugin_manager.status(plugin) == PluginState.ENABLED:
+                # We need to explicitly disable the plugin before reconfiguration
+                # That happens because, differently from the actual configs, we cannot
+                # retrieve the key values and check if they changed.
                 self.charm.plugin_manager.apply_config(plugin.disable())
             self.charm.plugin_manager.apply_config(plugin.config())
         except OpenSearchError as e:
@@ -507,6 +513,7 @@ def _on_s3_credentials_changed(self, event: EventBase) -> None:
             PluginState.ENABLED,
             PluginState.WAITING_FOR_UPGRADE,
         ]:
+            logger.warning("_on_s3_credentials_changed: plugin is not enabled.")
             event.defer()
             return
 
diff --git a/lib/charms/opensearch/v0/opensearch_base_charm.py b/lib/charms/opensearch/v0/opensearch_base_charm.py
index 90b33e3dce..8bc29efe7c 100644
--- a/lib/charms/opensearch/v0/opensearch_base_charm.py
+++ b/lib/charms/opensearch/v0/opensearch_base_charm.py
@@ -493,22 +493,21 @@ def _on_config_changed(self, event: ConfigChangedEvent):
             event.defer()
             return
 
-        self.status.set(MaintenanceStatus(PluginConfigStart))
         try:
+            self.status.set(MaintenanceStatus(PluginConfigStart))
             if self.plugin_manager.run():
                 self.on[self.service_manager.name].acquire_lock.emit(
                     callback_override="_restart_opensearch"
                 )
-        except OpenSearchNotFullyReadyError:
-            logger.warning("Plugin management: cluster not ready yet at config changed")
-            event.defer()
-            return
-        except OpenSearchPluginError:
-            self.status.set(BlockedStatus(PluginConfigChangeError))
+            self.status.clear(PluginConfigStart)
+        except (OpenSearchNotFullyReadyError, OpenSearchPluginError) as e:
+            if isinstance(e, OpenSearchNotFullyReadyError):
+                logger.warning("Plugin management: cluster not ready yet at config changed")
+            else:
+                self.status.set(BlockedStatus(PluginConfigChangeError))
             event.defer()
             return
         self.status.clear(PluginConfigChangeError)
-        self.status.clear(PluginConfigStart)
 
     def _on_set_password_action(self, event: ActionEvent):
         """Set new admin password from user input or generate if not passed."""
@@ -619,7 +618,6 @@ def _start_opensearch(self, event: EventBase) -> None:  # noqa: C901
                 event.defer()
                 self.defer_trigger_event.emit()
             return
-
         if not self._can_service_start():
             self.peers_data.delete(Scope.UNIT, "starting")
             event.defer()
@@ -632,9 +630,7 @@ def _start_opensearch(self, event: EventBase) -> None:  # noqa: C901
             self.peers_data.delete(Scope.UNIT, "starting")
             event.defer()
             return
-
         self.unit.status = WaitingStatus(WaitingToStart)
-
         rel = self.model.get_relation(PeerRelationName)
         for unit in rel.units.union({self.unit}):
             if rel.data[unit].get("starting") == "True":
@@ -646,7 +642,6 @@ def _start_opensearch(self, event: EventBase) -> None:  # noqa: C901
         try:
             # Retrieve the nodes of the cluster, needed to configure this node
             nodes = self._get_nodes(False)
-
             # validate the roles prior to starting
             self.opensearch_peer_cm.validate_roles(nodes, on_new_unit=True)
 
diff --git a/lib/charms/opensearch/v0/opensearch_plugin_manager.py b/lib/charms/opensearch/v0/opensearch_plugin_manager.py
index 256c5c6b28..7dc737f130 100644
--- a/lib/charms/opensearch/v0/opensearch_plugin_manager.py
+++ b/lib/charms/opensearch/v0/opensearch_plugin_manager.py
@@ -10,9 +10,12 @@
 config-changed, upgrade, s3-credentials-changed, etc.
 """
 
+import copy
+import functools
 import logging
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
+from charms.opensearch.v0.helper_cluster import ClusterTopology
 from charms.opensearch.v0.opensearch_exceptions import (
     OpenSearchCmdError,
     OpenSearchNotFullyReadyError,
@@ -78,6 +81,11 @@ def __init__(self, charm):
         self._keystore = OpenSearchKeystore(self._charm)
         self._event_scope = OpenSearchPluginEventScope.DEFAULT
 
+    @functools.cached_property
+    def cluster_config(self):
+        """Returns the cluster configuration."""
+        return ClusterTopology.get_cluster_settings(self._charm.opensearch, include_defaults=True)
+
     def set_event_scope(self, event_scope: OpenSearchPluginEventScope) -> None:
         """Sets the event scope of the plugin manager.
 
@@ -129,25 +137,31 @@ def _extra_conf(self, plugin_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
             }
         return {**self._charm_config, "opensearch-version": self._opensearch.version}
 
+    def check_plugin_manager_ready(self) -> bool:
+        """Checks if the plugin manager is ready to run."""
+        return (
+            self._charm.opensearch.is_node_up()
+            and len(self._charm._get_nodes(True)) == self._charm.app.planned_units()
+            and self._charm.health.apply()
+            in [
+                HealthColors.GREEN,
+                HealthColors.YELLOW,
+            ]
+        )
+
     def run(self) -> bool:
         """Runs a check on each plugin: install, execute config changes or remove.
 
         This method should be called at config-changed event. Returns if needed restart.
         """
-        if not self._charm.opensearch.is_started() or self._charm.health.apply() not in [
-            HealthColors.GREEN,
-            HealthColors.YELLOW,
-        ]:
-            # If the health is not green, then raise a cluster-not-ready error
-            # The classes above should then defer their own events in waiting.
-            # Defer is important as next steps to configure plugins will involve
-            # calls to the APIs of the cluster.
-            logger.info("Cluster not ready, wait for the next event...")
+        if not self.check_plugin_manager_ready():
             raise OpenSearchNotFullyReadyError()
 
         err_msgs = []
         restart_needed = False
         for plugin in self.plugins:
+            logger.info(f"Checking plugin {plugin.name}...")
+            logger.debug(f"Status: {self.status(plugin)}")
             # These are independent plugins.
             # Any plugin that errors, if that is an OpenSearchPluginError, then
             # it is an "expected" error, such as missing additional config; should
@@ -172,8 +186,8 @@ def run(self) -> bool:
                 # This is a more serious issue, as we are missing some input from
                 # the user. The charm should block.
                 err_msgs.append(str(e))
-            except OpenSearchPluginError as e:
-                logger.error(f"Plugin {plugin.name} failed: {str(e)}")
+            logger.debug(f"Finished Plugin {plugin.name} status: {self.status(plugin)}")
+        logger.info(f"Plugin check finished, restart needed: {restart_needed}")
 
         if err_msgs:
             raise OpenSearchPluginError("\n".join(err_msgs))
@@ -255,6 +269,39 @@ def _disable_if_needed(self, plugin: OpenSearchPlugin) -> bool:
         except KeyError as e:
             raise OpenSearchPluginMissingConfigError(plugin.name, configs=[f"{e}"])
 
+    def _compute_settings(
+        self, config: OpenSearchPluginConfig
+    ) -> Tuple[Dict[str, str], Dict[str, str]]:
+        """Returns the current and the new configuration."""
+        current_settings = self.cluster_config
+        # We use current_settings and new_conf and check for any differences
+        # therefore, we need to make a deepcopy here before editing new_conf
+        new_conf = copy.deepcopy(current_settings)
+        for key_to_del in config.config_entries_to_del:
+            if key_to_del in new_conf:
+                del new_conf[key_to_del]
+        new_conf |= config.config_entries_to_add
+
+        logger.debug(
+            "Difference between current and new configuration: \n"
+            + str(
+                {
+                    "in current but not in new_conf": {
+                        k: v for k, v in current_settings.items() if k not in new_conf.keys()
+                    },
+                    "in new_conf but not in current": {
+                        k: v for k, v in new_conf.items() if k not in current_settings.keys()
+                    },
+                    "in both but different values": {
+                        k: v
+                        for k, v in new_conf.items()
+                        if k in current_settings.keys() and current_settings[k] != v
+                    },
+                }
+            )
+        )
+        return current_settings, new_conf
+
     def apply_config(self, config: OpenSearchPluginConfig) -> bool:
         """Runs the configuration changes as passed via OpenSearchPluginConfig.
 
@@ -262,27 +309,25 @@ def apply_config(self, config: OpenSearchPluginConfig) -> bool:
         1) Remove the entries to be deleted
         2) Add entries, if available
 
-        For example:
-            KNN needs to:
-            1) Remove from configuration: {"knn.plugin.enabled": "True"}
-            2) Add to configuration: {"knn.plugin.enabled": "False"}
-
         Returns True if a configuration change was performed.
         """
         self._keystore.delete(config.secret_entries_to_del)
         self._keystore.add(config.secret_entries_to_add)
-        # Add and remove configuration if applies
+        if config.secret_entries_to_del or config.secret_entries_to_add:
+            self._keystore.reload_keystore()
+
+        current_settings, new_conf = self._compute_settings(config)
+        if current_settings == new_conf:
+            # Nothing to do here
+            logger.info("apply_config: nothing to do, return")
+            return False
+
+        # Update the configuration
         if config.config_entries_to_del:
             self._opensearch_config.delete_plugin(config.config_entries_to_del)
-
         if config.config_entries_to_add:
             self._opensearch_config.add_plugin(config.config_entries_to_add)
-
-        if config.secret_entries_to_del or config.secret_entries_to_add:
-            self._keystore.reload_keystore()
-
-        # Return True if some configuration entries changed
-        return True if config.config_entries_to_add or config.config_entries_to_del else False
+        return True
 
     def status(self, plugin: OpenSearchPlugin) -> PluginState:
         """Returns the status for a given plugin."""
@@ -320,32 +365,28 @@ def _user_requested_to_enable(self, plugin: OpenSearchPlugin) -> bool:
     def _is_enabled(self, plugin: OpenSearchPlugin) -> bool:
         """Returns true if plugin is enabled.
 
-        The main question to answer is if we would remove the configuration from
-        opensearch.yaml and/or add some other configuration instead.
-        If yes, then we know that the service is enabled.
+        The main question to answer is if we would have it in the configuration
+        from cluster settings. If yes, then we know that the service is enabled.
 
-        The disable() method is used, as a given entry will be removed from opensearch.yml.
-
-        If disable() returns a list, then check if the keys in the stored_plugin_conf
-        matches the list values; otherwise, if a dict, then check if the keys and values match.
-        If they match in any of the cases above, then return True.
+        Check if the configuration from enable() is present or not.
         """
         try:
-            plugin_conf = plugin.disable().config_entries_to_del
-            keystore_conf = plugin.disable().secret_entries_to_del
-            stored_plugin_conf = self._opensearch_config.get_plugin(plugin_conf)
-
-            if any(k not in self._keystore.list() for k in keystore_conf):
+            current_settings, new_conf = self._compute_settings(plugin.config())
+            if current_settings != new_conf:
                 return False
 
-            # Using sets to guarantee matches; stored_plugin_conf will be a dict
-            if isinstance(plugin_conf, list):
-                return set(plugin_conf) == set(stored_plugin_conf)
-            else:  # plugin_conf is a dict
-                return plugin_conf == stored_plugin_conf
+            # Now, focus on the keystore part
+            keys_available = self._keystore.list()
+            keys_to_add = plugin.config().secret_entries_to_add
+            if any(k not in keys_available for k in keys_to_add):
+                return False
+            keys_to_del = plugin.config().secret_entries_to_del
+            if any(k in keys_available for k in keys_to_del):
+                return False
         except (KeyError, OpenSearchPluginError) as e:
             logger.warning(f"_is_enabled: error with {e}")
             return False
+        return True
 
     def _needs_upgrade(self, plugin: OpenSearchPlugin) -> bool:
         """Returns true if plugin needs upgrade."""
diff --git a/lib/charms/opensearch/v0/opensearch_plugins.py b/lib/charms/opensearch/v0/opensearch_plugins.py
index 52b333d9a5..1cf4c594e6 100644
--- a/lib/charms/opensearch/v0/opensearch_plugins.py
+++ b/lib/charms/opensearch/v0/opensearch_plugins.py
@@ -277,6 +277,7 @@ def _on_update_status(self, event):
 from charms.opensearch.v0.helper_enums import BaseStrEnum
 from charms.opensearch.v0.opensearch_exceptions import OpenSearchError
 from jproperties import Properties
+from pydantic import BaseModel, validator
 
 # The unique Charmhub library identifier, never change it
 LIBID = "3b05456c6e304680b4af8e20dae246a2"
@@ -331,20 +332,28 @@ class PluginState(BaseStrEnum):
     WAITING_FOR_UPGRADE = "waiting-for-upgrade"
 
 
-class OpenSearchPluginConfig:
-    """Represent the configuration of a plugin to be applied when configuring or disabling it."""
+class OpenSearchPluginConfig(BaseModel):
+    """Represent the configuration of a plugin to be applied when configuring or disabling it.
 
-    def __init__(
-        self,
-        config_entries_to_add: Optional[Dict[str, str]] = None,
-        config_entries_to_del: Optional[List[str]] = None,
-        secret_entries_to_add: Optional[Dict[str, str]] = None,
-        secret_entries_to_del: Optional[List[str]] = None,
-    ):
-        self.config_entries_to_add = config_entries_to_add or {}
-        self.config_entries_to_del = config_entries_to_del or []
-        self.secret_entries_to_add = secret_entries_to_add or {}
-        self.secret_entries_to_del = secret_entries_to_del or []
+    The config may receive any type of data, but will convert everything to strings and
+    pay attention to special types, such as booleans, which need to be "true" or "false".
+    """
+
+    config_entries_to_add: Optional[Dict[str, str]] = {}
+    config_entries_to_del: Optional[List[str]] = []
+    secret_entries_to_add: Optional[Dict[str, str]] = {}
+    secret_entries_to_del: Optional[List[str]] = []
+
+    @validator("config_entries_to_add", "secret_entries_to_add", allow_reuse=True, pre=True)
+    def convert_values_to_add(cls, conf) -> Dict[str, str]:  # noqa N805
+        """Converts the object to a dictionary.
+
+        Respects the conversion for boolean to {"true", "false"}.
+        """
+        return {
+            key: str(val).lower() if isinstance(val, bool) else str(val)
+            for key, val in conf.items()
+        }
 
 
 class OpenSearchPlugin:
@@ -389,9 +398,9 @@ def config(self) -> OpenSearchPluginConfig:
         Format:
         OpenSearchPluginConfig(
             config_entries_to_add = {...},
-            config_entries_to_del = [...]|{...},
+            config_entries_to_del = [...],
             secret_entries_to_add = {...},
-            secret_entries_to_del = [...]|{...},
+            secret_entries_to_del = [...],
         )
 
         May throw KeyError if accessing some source, such as self._extra_config, but the
@@ -406,9 +415,9 @@ def disable(self) -> OpenSearchPluginConfig:
         Format:
         OpenSearchPluginConfig(
             config_entries_to_add = {...},
-            config_entries_to_del = [...]|{...},
+            config_entries_to_del = [...],
             secret_entries_to_add = {...},
-            secret_entries_to_del = [...]|{...},
+            secret_entries_to_del = [...],
         )
 
         May throw KeyError if accessing some source, such as self._extra_config, but the
@@ -429,14 +438,12 @@ def config(self) -> OpenSearchPluginConfig:
         """Returns a plugin config object to be applied for enabling the current plugin."""
         return OpenSearchPluginConfig(
             config_entries_to_add={"knn.plugin.enabled": True},
-            config_entries_to_del={"knn.plugin.enabled": False},
         )
 
     def disable(self) -> OpenSearchPluginConfig:
         """Returns a plugin config object to be applied for disabling the current plugin."""
         return OpenSearchPluginConfig(
             config_entries_to_add={"knn.plugin.enabled": False},
-            config_entries_to_del={"knn.plugin.enabled": True},
         )
 
     @property
@@ -462,9 +469,9 @@ def config(self) -> OpenSearchPluginConfig:
         Format:
         OpenSearchPluginConfig(
             config_entries_to_add = {...},
-            config_entries_to_del = [...]|{...},
+            config_entries_to_del = [...],
             secret_entries_to_add = {...},
-            secret_entries_to_del = [...]|{...},
+            secret_entries_to_del = [...],
         )
         """
         if not self._extra_config.get("access-key") or not self._extra_config.get("secret-key"):
@@ -480,11 +487,6 @@ def config(self) -> OpenSearchPluginConfig:
             )
 
         return OpenSearchPluginConfig(
-            # Try to remove the previous values
-            secret_entries_to_del=[
-                "s3.client.default.access_key",
-                "s3.client.default.secret_key",
-            ],
             secret_entries_to_add={
                 # Remove any entries with None value
                 k: v
@@ -502,9 +504,9 @@ def disable(self) -> OpenSearchPluginConfig:
         Format:
         OpenSearchPluginConfig(
             config_entries_to_add = {...},
-            config_entries_to_del = [...]|{...},
+            config_entries_to_del = [...],
             secret_entries_to_add = {...},
-            secret_entries_to_del = [...]|{...},
+            secret_entries_to_del = [...],
         )
         """
         return OpenSearchPluginConfig(
diff --git a/lib/charms/opensearch/v0/opensearch_relation_provider.py b/lib/charms/opensearch/v0/opensearch_relation_provider.py
index 570d90f995..6fbbbf0680 100644
--- a/lib/charms/opensearch/v0/opensearch_relation_provider.py
+++ b/lib/charms/opensearch/v0/opensearch_relation_provider.py
@@ -328,18 +328,18 @@ def update_certs(self, relation_id, ca_chain=None):
         """
         if not self.unit.is_leader():
             # We're updating app databags in this function, so it can't be called on follower
-            # units.
+            # units. This is not checked in `set_tls_ca`, in data-interfaces.
             return
-        if not ca_chain:
-            try:
-                ca_chain = self.charm.secrets.get_object(Scope.APP, CertType.APP_ADMIN.val).get(
-                    "chain"
-                )
-            except AttributeError:
-                # cert doesn't exist - presumably we don't yet have a TLS relation.
-                return
-
-        self.opensearch_provides.set_tls_ca(relation_id, ca_chain)
+        try:
+            # If the ca_chain=None, then we try to load the CA from the app-admin secret.
+            _ch_chain = ca_chain or self.charm.secrets.get_object(
+                Scope.APP, CertType.APP_ADMIN.val
+            ).get("chain")
+        except AttributeError:
+            # cert doesn't exist - presumably we don't yet have a TLS relation.
+            logger.warning("unable to get ca_chain")
+            return
+        self.opensearch_provides.set_tls_ca(relation_id, _ch_chain)
 
     def _on_relation_changed(self, event: RelationChangedEvent) -> None:
         if not self.unit.is_leader():
diff --git a/tests/integration/ha/test_large_deployments.py b/tests/integration/ha/test_large_deployments.py
index 0eec7a8dcd..7326fb29ea 100644
--- a/tests/integration/ha/test_large_deployments.py
+++ b/tests/integration/ha/test_large_deployments.py
@@ -120,6 +120,8 @@ async def test_set_roles_manually(
         assert node.temperature is None, "Node temperature was erroneously set."
 
     # change cluster name and roles + temperature, should trigger a rolling restart
+
+    logger.info("Changing cluster name and roles + temperature.")
     await ops_test.model.applications[app].set_config(
         {"cluster_name": "new_cluster_name", "roles": "cluster_manager, data.cold"}
     )
@@ -131,6 +133,8 @@ async def test_set_roles_manually(
         wait_for_exact_units=len(nodes),
         idle_period=IDLE_PERIOD,
     )
+
+    logger.info("Checking if the cluster name and roles + temperature were changed.")
     assert await check_cluster_formation_successful(
         ops_test, leader_unit_ip, get_application_unit_names(ops_test, app=app)
     )
diff --git a/tests/integration/plugins/test_plugins.py b/tests/integration/plugins/test_plugins.py
index 1c9ec5aa23..da209abf6a 100644
--- a/tests/integration/plugins/test_plugins.py
+++ b/tests/integration/plugins/test_plugins.py
@@ -79,6 +79,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None:
     assert len(ops_test.model.applications[APP_NAME].units) == 3
 
 
+@pytest.mark.abort_on_fail
 async def test_prometheus_exporter_enabled_by_default(ops_test):
     """Test that Prometheus Exporter is running before the relation is there."""
     leader_unit_ip = await get_leader_unit_ip(ops_test, app=APP_NAME)
@@ -90,6 +91,7 @@ async def test_prometheus_exporter_enabled_by_default(ops_test):
     assert len(response_str.split("\n")) > 500
 
 
+@pytest.mark.abort_on_fail
 async def test_prometheus_exporter_cos_relation(ops_test):
     await ops_test.model.deploy(COS_APP_NAME, channel="edge"),
     await ops_test.model.integrate(APP_NAME, COS_APP_NAME)
@@ -119,6 +121,7 @@ async def test_prometheus_exporter_cos_relation(ops_test):
     assert relation_data["scheme"] == "https"
 
 
+@pytest.mark.abort_on_fail
 async def test_monitoring_user_fetch_prometheus_data(ops_test):
     leader_unit_ip = await get_leader_unit_ip(ops_test, app=APP_NAME)
     endpoint = f"https://{leader_unit_ip}:9200/_prometheus/metrics"
@@ -139,6 +142,7 @@ async def test_monitoring_user_fetch_prometheus_data(ops_test):
     assert len(response_str.split("\n")) > 500
 
 
+@pytest.mark.abort_on_fail
 async def test_prometheus_monitor_user_password_change(ops_test):
     # Password change applied as expected
     leader_id = await get_leader_unit_id(ops_test, APP_NAME)
@@ -161,6 +165,7 @@ async def test_prometheus_monitor_user_password_change(ops_test):
     assert relation_data["password"] == new_password
 
 
+@pytest.mark.abort_on_fail
 async def test_knn_enabled_disabled(ops_test):
     config = await ops_test.model.applications[APP_NAME].get_config()
     assert config["plugin_opensearch_knn"]["default"] is True
@@ -177,6 +182,7 @@ async def test_knn_enabled_disabled(ops_test):
 
     config = await ops_test.model.applications[APP_NAME].get_config()
     assert config["plugin_opensearch_knn"]["value"] is True
+    await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", idle_period=45)
 
 
 @pytest.mark.abort_on_fail
@@ -218,7 +224,7 @@ async def test_knn_search_with_hnsw_faiss(ops_test: OpsTest) -> None:
     # Insert data in bulk
     await bulk_insert(ops_test, app, leader_unit_ip, payload)
     query = {"size": 2, "query": {"knn": {vector_name: {"vector": payload_list[0], "k": 2}}}}
-    docs = await search(ops_test, app, leader_unit_ip, index_name, query)
+    docs = await search(ops_test, app, leader_unit_ip, index_name, query, retries=30)
     assert len(docs) == 2
 
 
@@ -261,7 +267,7 @@ async def test_knn_search_with_hnsw_nmslib(ops_test: OpsTest) -> None:
     # Insert data in bulk
     await bulk_insert(ops_test, app, leader_unit_ip, payload)
     query = {"size": 2, "query": {"knn": {vector_name: {"vector": payload_list[0], "k": 2}}}}
-    docs = await search(ops_test, app, leader_unit_ip, index_name, query)
+    docs = await search(ops_test, app, leader_unit_ip, index_name, query, retries=30)
     assert len(docs) == 2
 
 
diff --git a/tests/unit/lib/test_backups.py b/tests/unit/lib/test_backups.py
index 6f09fad528..9c77b97cf1 100644
--- a/tests/unit/lib/test_backups.py
+++ b/tests/unit/lib/test_backups.py
@@ -490,12 +490,18 @@ def test_get_endpoint_protocol(self) -> None:
         assert self.charm.backup._get_endpoint_protocol("https://10.0.0.2:8000") == "https"
         assert self.charm.backup._get_endpoint_protocol("test.not-valid-url") == "https"
 
+    @patch(
+        "charms.opensearch.v0.opensearch_plugin_manager.OpenSearchPluginManager.check_plugin_manager_ready"
+    )
     @patch("charms.opensearch.v0.opensearch_plugin_manager.OpenSearchPluginManager.status")
     @patch("charms.opensearch.v0.opensearch_backups.OpenSearchBackup.apply_api_config_if_needed")
     @patch("charms.opensearch.v0.opensearch_plugin_manager.OpenSearchPluginManager.apply_config")
     @patch("charms.opensearch.v0.opensearch_distro.OpenSearchDistribution.version")
-    def test_00_update_relation_data(self, __, mock_apply_config, _, mock_status) -> None:
+    def test_00_update_relation_data(
+        self, __, mock_apply_config, _, mock_status, mock_pm_ready
+    ) -> None:
         """Tests if new relation without data returns."""
+        mock_pm_ready.return_value = True
         mock_status.return_value = PluginState.INSTALLED
         self.harness.update_relation_data(
             self.s3_rel_id,
@@ -513,10 +519,6 @@ def test_00_update_relation_data(self, __, mock_apply_config, _, mock_status) ->
         assert (
             mock_apply_config.call_args[0][0].__dict__
             == OpenSearchPluginConfig(
-                secret_entries_to_del=[
-                    "s3.client.default.access_key",
-                    "s3.client.default.secret_key",
-                ],
                 secret_entries_to_add={
                     "s3.client.default.access_key": "aaaa",
                     "s3.client.default.secret_key": "bbbb",
diff --git a/tests/unit/lib/test_helper_cluster.py b/tests/unit/lib/test_helper_cluster.py
index 9e1ba10abc..93eca237dd 100644
--- a/tests/unit/lib/test_helper_cluster.py
+++ b/tests/unit/lib/test_helper_cluster.py
@@ -241,3 +241,38 @@ def test_node_obj_creation_from_json(self):
         self.assertEqual(raw_node.name, from_json_node.name)
         self.assertEqual(raw_node.roles, from_json_node.roles)
         self.assertEqual(raw_node.ip, from_json_node.ip)
+
+    @patch("charms.opensearch.v0.helper_cluster.OpenSearchDistribution.request")
+    def test_get_cluster_settings(self, request_mock):
+        """Test the get_cluster_settings method."""
+        request_mock.return_value = {
+            "defaults": {
+                "knn.plugin.enabled": "false",
+            },
+            "persistent": {
+                "knn.plugin.enabled": "true",
+                "cluster.routing.allocation.enable": "all",
+            },
+            "transient": {
+                "indices.recovery.max_bytes_per_sec": "50mb",
+            },
+        }
+
+        expected_settings = {
+            "knn.plugin.enabled": "true",
+            "cluster.routing.allocation.enable": "all",
+            "indices.recovery.max_bytes_per_sec": "50mb",
+        }
+
+        settings = ClusterTopology.get_cluster_settings(
+            self.opensearch,
+            include_defaults=True,
+        )
+
+        self.assertEqual(settings, expected_settings)
+        request_mock.assert_called_once_with(
+            "GET",
+            "/_cluster/settings?flat_settings=true&include_defaults=true",
+            host=None,
+            alt_hosts=None,
+        )
diff --git a/tests/unit/lib/test_ml_plugins.py b/tests/unit/lib/test_ml_plugins.py
index d1aa9e1c23..ac26c77e45 100644
--- a/tests/unit/lib/test_ml_plugins.py
+++ b/tests/unit/lib/test_ml_plugins.py
@@ -54,7 +54,12 @@ def setUp(self) -> None:
         }
         self.charm.opensearch.is_started = MagicMock(return_value=True)
         self.charm.health.apply = MagicMock(return_value=HealthColors.GREEN)
+        self.plugin_manager._is_cluster_ready = MagicMock(return_value=True)
+        charms.opensearch.v0.helper_cluster.ClusterTopology.get_cluster_settings = MagicMock(
+            return_value={}
+        )
 
+    @patch(f"{BASE_LIB_PATH}.opensearch_distro.OpenSearchDistribution.is_node_up")
     @patch(
         f"{BASE_LIB_PATH}.opensearch_peer_clusters.OpenSearchPeerClustersManager.deployment_desc"
     )
@@ -78,6 +83,7 @@ def test_disable_via_config_change(
         mock_version,
         mock_acquire_lock,
         ___,
+        mock_is_node_up,
     ) -> None:
         """Tests entire config_changed event with KNN plugin."""
         mock_status.return_value = PluginState.ENABLED
@@ -89,12 +95,12 @@ def test_disable_via_config_change(
         self.plugin_manager._opensearch_config.delete_plugin = MagicMock()
         self.plugin_manager._opensearch_config.add_plugin = MagicMock()
         self.charm.status = MagicMock()
+        mock_is_node_up.return_value = True
+        self.charm._get_nodes = MagicMock(return_value=[1])
+        self.charm.planned_units = MagicMock(return_value=1)
 
         self.harness.update_config({"plugin_opensearch_knn": False})
         mock_acquire_lock.assert_called_once()
-        self.plugin_manager._opensearch_config.delete_plugin.assert_called_once_with(
-            {"knn.plugin.enabled": True}
-        )
         self.plugin_manager._opensearch_config.add_plugin.assert_called_once_with(
-            {"knn.plugin.enabled": False}
+            {"knn.plugin.enabled": "false"}
         )
diff --git a/tests/unit/lib/test_plugins.py b/tests/unit/lib/test_plugins.py
index d22d83b267..dd9b31242b 100644
--- a/tests/unit/lib/test_plugins.py
+++ b/tests/unit/lib/test_plugins.py
@@ -122,6 +122,7 @@ def setUp(self) -> None:
         self.charm.opensearch.is_started = MagicMock(return_value=True)
         self.charm.health.apply = MagicMock(return_value=HealthColors.GREEN)
         self.charm.opensearch.version = "2.9.0"
+        self.plugin_manager._is_cluster_ready = MagicMock(return_value=True)
 
     @patch("charms.opensearch.v0.opensearch_plugin_manager.OpenSearchPluginManager._is_enabled")
     @patch("charms.opensearch.v0.opensearch_plugin_manager.OpenSearchPluginManager._is_installed")
@@ -242,6 +243,16 @@ def test_reconfigure_and_add_keystore_plugin(
         # Mock _installed_plugins to return test
         mock_installed_plugins.return_value = ["test"]
 
+        self.charm._get_nodes = MagicMock(
+            return_value={
+                "1": {},
+                "2": {},
+                "3": {},
+            }
+        )
+        self.charm.app.planned_units = MagicMock(return_value=3)
+        self.charm.opensearch.is_node_up = MagicMock(return_value=True)
+
         mock_load.return_value = {}
         # run is called, but only _configure method really matter:
         # Set install to false, so only _configure is evaluated
@@ -291,6 +302,16 @@ def test_plugin_setup_with_relation(
         # Return a fake content of the relation
         mock_process_relation.return_value = {"param": "tested"}
 
+        self.charm._get_nodes = MagicMock(
+            return_value={
+                "1": {},
+                "2": {},
+                "3": {},
+            }
+        )
+        self.charm.app.planned_units = MagicMock(return_value=3)
+        self.charm.opensearch.is_node_up = MagicMock(return_value=True)
+
         # Keystore-related mocks
         self.plugin_manager._keystore._add = MagicMock()
         self.plugin_manager._opensearch.request = MagicMock(return_value={"status": 200})
@@ -311,11 +332,13 @@ def test_plugin_setup_with_relation(
         mock_plugin_relation.return_value = True
         # plugin is initially disabled and enabled when method self._disable calls self.status
         mock_is_enabled.side_effect = [
+            False,  # called by logger
             False,  # called by self.status, in self._install
             False,  # called by self._configure
             True,  # called by self.status, in self._disable
+            True,  # called by logger
         ]
-
+        charms.opensearch.v0.opensearch_plugin_manager.logger = MagicMock()
         self.assertTrue(self.plugin_manager.run())
         self.plugin_manager._keystore._add.assert_has_calls([call("key1", "secret1")])
         self.charm.opensearch.config.put.assert_has_calls(
@@ -326,6 +349,7 @@ def test_plugin_setup_with_relation(
             [call("POST", "_nodes/reload_secure_settings")]
         )
 
+    @patch("charms.opensearch.v0.opensearch_plugin_manager.ClusterTopology.get_cluster_settings")
     @patch("charms.opensearch.v0.opensearch_plugin_manager.OpenSearchPluginManager._extra_conf")
     @patch("charms.opensearch.v0.opensearch_plugin_manager.OpenSearchPluginManager._is_enabled")
     @patch(
@@ -334,16 +358,15 @@ def test_plugin_setup_with_relation(
     @patch(
         "charms.opensearch.v0.opensearch_plugin_manager.OpenSearchPluginManager._installed_plugins"
     )
-    @patch("charms.opensearch.v0.opensearch_config.OpenSearchConfig.load_node")
     @patch("charms.opensearch.v0.opensearch_distro.OpenSearchDistribution.version")
     def test_disable_plugin(
         self,
         _,
-        mock_load,
         mock_installed_plugins,
         mock_plugin_relation,
         mock_is_enabled,
-        mock_extra_conf,
+        __,
+        mock_get_cluster_settings,
     ) -> None:
         """Tests end-to-end the disable of a plugin."""
         # Keystore-related mocks
@@ -363,8 +386,17 @@ def test_disable_plugin(
         # Mock _installed_plugins to return test
         mock_installed_plugins.return_value = ["test"]
 
-        # load_node will be called multiple times
-        mock_load.side_effect = {"param": "tested"}
+        self.charm._get_nodes = MagicMock(
+            return_value={
+                "1": {},
+                "2": {},
+                "3": {},
+            }
+        )
+        self.charm.app.planned_units = MagicMock(return_value=3)
+        self.charm.opensearch.is_node_up = MagicMock(return_value=True)
+
+        mock_get_cluster_settings.return_value = {"param": "tested"}
         mock_plugin_relation.return_value = False
         # plugin is initially disabled and enabled when method self._disable calls self.status
         mock_is_enabled.return_value = True
@@ -414,10 +446,6 @@ def test_config_with_valid_keys(self):
             "secret-key": "SECRET_KEY",
         }
         expected_config = OpenSearchPluginConfig(
-            secret_entries_to_del=[
-                "s3.client.default.access_key",
-                "s3.client.default.secret_key",
-            ],
             secret_entries_to_add={
                 "s3.client.default.access_key": "ACCESS_KEY",
                 "s3.client.default.secret_key": "SECRET_KEY",