diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d78f4dfce..e4f6fe8ed 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,7 +16,7 @@ on: jobs: lint: name: Lint - uses: canonical/data-platform-workflows/.github/workflows/lint.yaml@v22.0.0 + uses: canonical/data-platform-workflows/.github/workflows/lint.yaml@v23.0.4 unit-test: name: Unit test charm @@ -24,7 +24,7 @@ jobs: timeout-minutes: 10 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install tox & poetry run: | pipx install tox @@ -39,7 +39,7 @@ jobs: timeout-minutes: 5 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 - run: | diff --git a/lib/charms/opensearch/v0/opensearch_peer_clusters.py b/lib/charms/opensearch/v0/opensearch_peer_clusters.py index 11fbd281d..7f6ac9443 100644 --- a/lib/charms/opensearch/v0/opensearch_peer_clusters.py +++ b/lib/charms/opensearch/v0/opensearch_peer_clusters.py @@ -2,14 +2,18 @@ # See LICENSE file for licensing details. """Class for Managing simple or large deployments and configuration related changes.""" +import json import logging from datetime import datetime from typing import TYPE_CHECKING, List, Literal, Optional from charms.opensearch.v0.constants_charm import ( + AdminUser, CMRoleRemovalForbidden, CmVoRolesProvidedInvalid, + COSUser, DataRoleRemovalForbidden, + KibanaserverUser, PClusterNoRelation, PClusterWrongNodesCountForQuorum, PClusterWrongRelation, @@ -528,7 +532,7 @@ def rel_data(self) -> Optional[PeerClusterRelData]: if not (data := rel.data[rel.app].get("data")): return None - return PeerClusterRelData.from_str(data) + return self.rel_data_from_str(data) def _pre_validate_roles_change(self, new_roles: List[str], prev_roles: List[str]): """Validate that the config changes of roles are allowed to happen.""" @@ -590,3 +594,61 @@ def _deployment_type(config: PeerClusterConfig, start_mode: StartMode) -> Deploy if not config.init_hold else DeploymentType.FAILOVER_ORCHESTRATOR ) + + def rel_data_from_str(self, redacted_dict_str: str) -> PeerClusterRelData: + """Construct the peer cluster rel data from the secret data.""" + content = json.loads(redacted_dict_str) + credentials = content["credentials"] + + credentials["admin_password"] = ( + self._charm.model.get_secret(id=credentials["admin_password"]) + .get_content() + .get(self._charm.secrets.password_key(AdminUser)) + ) + + credentials["admin_password_hash"] = ( + self._charm.model.get_secret(id=credentials["admin_password_hash"]) + .get_content() + .get(self._charm.secrets.hash_key(AdminUser)) + ) + + credentials["kibana_password"] = ( + self._charm.model.get_secret(id=credentials["kibana_password"]) + .get_content() + .get(self._charm.secrets.password_key(KibanaserverUser)) + ) + + credentials["kibana_password_hash"] = ( + self._charm.model.get_secret(id=credentials["kibana_password_hash"]) + .get_content() + .get(self._charm.secrets.hash_key(KibanaserverUser)) + ) + + if "monitor_password" in credentials: + credentials["monitor_password"] = ( + self._charm.model.get_secret(id=credentials["monitor_password"]) + .get_content() + .get(self._charm.secrets.password_key(COSUser)) + ) + + if "admin_tls" in credentials: + credentials["admin_tls"] = self._charm.model.get_secret( + id=credentials["admin_tls"] + ).get_content() + + if ( + "s3" in credentials + and credentials["s3"].get("access-key") + and credentials["s3"].get("secret-key") + ): + credentials["s3"]["access-key"] = ( + self._charm.model.get_secret(id=credentials["s3"]["access-key"]) + .get_content() + .get("s3-access-key") + ) + credentials["s3"]["secret-key"] = ( + self._charm.model.get_secret(id=credentials["s3"]["secret-key"]) + .get_content() + .get("s3-secret-key") + ) + return PeerClusterRelData.from_dict(content) diff --git a/lib/charms/opensearch/v0/opensearch_relation_peer_cluster.py b/lib/charms/opensearch/v0/opensearch_relation_peer_cluster.py index 39193d694..cd45df636 100644 --- a/lib/charms/opensearch/v0/opensearch_relation_peer_cluster.py +++ b/lib/charms/opensearch/v0/opensearch_relation_peer_cluster.py @@ -4,6 +4,7 @@ """Peer clusters relation related classes for OpenSearch.""" import json import logging +from hashlib import sha1 from typing import TYPE_CHECKING, Any, Dict, List, MutableMapping, Optional, Union from charms.opensearch.v0.constants_charm import ( @@ -75,6 +76,7 @@ def __init__(self, charm: "OpenSearchBaseCharm", relation_name: str): self.relation_name = relation_name self.charm = charm self.peer_cm = charm.opensearch_peer_cm + self.secrets = self.charm.secrets def get_from_rel( self, key: str, rel_id: int = None, remote_app: bool = False @@ -258,8 +260,17 @@ def refresh_relation_data( ) # compute the data that needs to be broadcast to all related clusters (success or error) + # if rel_data is an error, prepare to broadcast it to all related clusters rel_data = self._rel_data(deployment_desc, orchestrators) + # if rel_data is NOT an error, we will replace the plaintext credentials in + # the object, with their corresponding secret IDs + if isinstance(rel_data, PeerClusterRelData): + rel_data_redacted_dict = self._rel_data_redacted_dict(rel_data) + + # grant the secrets inside the rel_data to all the related clusters + self._grant_rel_data_secrets(rel_data_redacted_dict, all_relation_ids) + # exit if current cluster should not have been considered a provider if self._notify_if_wrong_integration(rel_data, all_relation_ids) and event_rel_id: self.delete_from_rel("trigger", rel_id=event_rel_id) @@ -281,9 +292,9 @@ def refresh_relation_data( orchestrators[f"{cluster_type}_app"] = deployment_desc.app.to_dict() self.charm.peers_data.put_object(Scope.APP, "orchestrators", orchestrators) - peer_rel_data_key, should_defer = "data", False + should_defer = False if isinstance(rel_data, PeerClusterRelErrorData): - peer_rel_data_key, should_defer = "error_data", rel_data.should_wait + should_defer = rel_data.should_wait # save the orchestrators of this fleet for rel_id in all_relation_ids: @@ -299,9 +310,19 @@ def refresh_relation_data( # there is no error to broadcast - we clear any previously broadcasted error if isinstance(rel_data, PeerClusterRelData): self.delete_from_rel("error_data", rel_id=rel_id) - - # are we potentially overriding stuff here? - self.put_in_rel(data={peer_rel_data_key: rel_data.to_str()}, rel_id=rel_id) + # we add the hash of the rel_data to only emit a change event + # if the data has actually changed + self.put_in_rel( + data={ + "data": json.dumps(rel_data_redacted_dict), + "rel_data_hash": sha1( + json.dumps(rel_data.to_dict(), sort_keys=True).encode() + ).hexdigest(), + }, + rel_id=rel_id, + ) + else: + self.put_in_rel(data={"error_data": rel_data.to_str()}, rel_id=rel_id) if can_defer and should_defer: event.defer() @@ -376,18 +397,26 @@ def _s3_credentials( return None # As the main orchestrator, this application must set the S3 information. + access_key = self.charm.backup.s3_client.get_s3_connection_info().get("access-key") + secret_key = self.charm.backup.s3_client.get_s3_connection_info().get("secret-key") + + # set the secrets in the charm + # TODO Move this to s3 relation and include both in one secret + self.charm.secrets.put(Scope.APP, "s3-access-key", access_key) + self.charm.secrets.put(Scope.APP, "s3-secret-key", secret_key) + return S3RelDataCredentials( - access_key=self.charm.backup.s3_client.get_s3_connection_info().get("access-key"), - secret_key=self.charm.backup.s3_client.get_s3_connection_info().get("secret-key"), + access_key=access_key, + secret_key=secret_key, ) - if not self.charm.secrets.get(Scope.APP, "access-key"): + if not self.charm.secrets.get(Scope.APP, "s3-access-key"): return None # Return what we have received from the peer relation return S3RelDataCredentials( - access_key=self.charm.secrets.get(Scope.APP, "access-key"), - secret_key=self.charm.secrets.get(Scope.APP, "secret-key"), + access_key=self.charm.secrets.get(Scope.APP, "s3-access-key"), + secret_key=self.charm.secrets.get(Scope.APP, "s3-secret-key"), ) def _rel_data( @@ -401,20 +430,27 @@ def _rel_data( # peer rel data for requirers to show a blocked status until it's fully # ready (will receive a subsequent try: - secrets = self.charm.secrets return PeerClusterRelData( cluster_name=deployment_desc.config.cluster_name, cm_nodes=self._fetch_local_cm_nodes(deployment_desc), credentials=PeerClusterRelDataCredentials( admin_username=AdminUser, - admin_password=secrets.get(Scope.APP, secrets.password_key(AdminUser)), - admin_password_hash=secrets.get(Scope.APP, secrets.hash_key(AdminUser)), - kibana_password=secrets.get(Scope.APP, secrets.password_key(KibanaserverUser)), - kibana_password_hash=secrets.get( - Scope.APP, secrets.hash_key(KibanaserverUser) + admin_password=self.secrets.get( + Scope.APP, self.secrets.password_key(AdminUser) ), - monitor_password=secrets.get(Scope.APP, secrets.password_key(COSUser)), - admin_tls=secrets.get_object(Scope.APP, CertType.APP_ADMIN.val), + admin_password_hash=self.secrets.get( + Scope.APP, self.secrets.hash_key(AdminUser) + ), + kibana_password=self.secrets.get( + Scope.APP, self.secrets.password_key(KibanaserverUser) + ), + kibana_password_hash=self.secrets.get( + Scope.APP, self.secrets.hash_key(KibanaserverUser) + ), + monitor_password=self.secrets.get( + Scope.APP, self.secrets.password_key(COSUser) + ), + admin_tls=self.secrets.get_object(Scope.APP, CertType.APP_ADMIN.val), s3=self._s3_credentials(deployment_desc), ), deployment_desc=deployment_desc, @@ -519,6 +555,72 @@ def _fetch_local_cm_nodes(self, deployment_desc: DeploymentDescription) -> List[ if node.is_cm_eligible() and node.app.id == deployment_desc.app.id ] + def _rel_data_redacted_dict(self, rel_data: PeerClusterRelData) -> dict[str, Any]: + """Replace the secrets' plain text content in the rel data by their IDs.""" + # hide the secrets and instead pass their ids so that + # they can be fetched when needed in the requirer side + redacted_dict = rel_data.to_dict() + + redacted_dict["credentials"] = { + "admin_username": AdminUser, + "admin_password": self.secrets.get_secret_id( + Scope.APP, self.secrets.password_key(AdminUser) + ), + "admin_password_hash": self.secrets.get_secret_id( + Scope.APP, self.secrets.hash_key(AdminUser) + ), + "kibana_password": self.secrets.get_secret_id( + Scope.APP, self.secrets.password_key(KibanaserverUser) + ), + "kibana_password_hash": self.secrets.get_secret_id( + Scope.APP, self.secrets.hash_key(KibanaserverUser) + ), + } + + if monitor_password := self.secrets.get_secret_id( + Scope.APP, self.secrets.password_key(COSUser) + ): + redacted_dict["credentials"]["monitor_password"] = monitor_password + if admin_tls := self.secrets.get_secret_id(Scope.APP, CertType.APP_ADMIN.val): + redacted_dict["credentials"]["admin_tls"] = admin_tls + + if ( + rel_data.credentials.s3 + and rel_data.credentials.s3.access_key + and rel_data.credentials.s3.secret_key + ): + # TODO Move this to s3 relation and include both in one secret + redacted_dict["credentials"]["s3"] = { + "access-key": self.secrets.get_secret_id(Scope.APP, "s3-access-key"), + "secret-key": self.secrets.get_secret_id(Scope.APP, "s3-secret-key"), + } + + return redacted_dict + + def _grant_rel_data_secrets( + self, rel_data_secret_content: dict[str, Any], all_rel_ids: list[int] + ): + """Grant the secrets to all the related apps.""" + credentials = rel_data_secret_content["credentials"] + for key, secret_id in credentials.items(): + # admin-username is not secrets + if key == "admin_username": + continue + + for rel_id in all_rel_ids: + if relation := self.get_rel(rel_id=rel_id): + if key == "s3": + if secret_id["access-key"]: + self.secrets.grant_secret_to_relation( + secret_id["access-key"], relation + ) + if secret_id["secret-key"]: + self.secrets.grant_secret_to_relation( + secret_id["secret-key"], relation + ) + else: + self.secrets.grant_secret_to_relation(secret_id, relation) + class OpenSearchPeerClusterRequirer(OpenSearchPeerClusterRelation): """Peer cluster relation requirer class.""" @@ -573,8 +675,7 @@ def _on_peer_cluster_relation_changed(self, event: RelationChangedEvent): # noq return # fetch the success data - data = PeerClusterRelData.from_str(data["data"]) - + data = self.peer_cm.rel_data_from_str(data["data"]) # check errors that can only be figured out from the requirer side if self._error_set_from_requirer(orchestrators, deployment_desc, data, event.relation.id): return @@ -619,18 +720,27 @@ def _on_peer_cluster_relation_changed(self, event: RelationChangedEvent): # noq def _set_security_conf(self, data: PeerClusterRelData) -> None: """Store security related config.""" # set admin secrets - secrets = self.charm.secrets - secrets.put(Scope.APP, secrets.password_key(AdminUser), data.credentials.admin_password) - secrets.put(Scope.APP, secrets.hash_key(AdminUser), data.credentials.admin_password_hash) - secrets.put( - Scope.APP, secrets.password_key(KibanaserverUser), data.credentials.kibana_password + self.secrets.put( + Scope.APP, self.secrets.password_key(AdminUser), data.credentials.admin_password + ) + self.secrets.put( + Scope.APP, self.secrets.hash_key(AdminUser), data.credentials.admin_password_hash + ) + self.secrets.put( + Scope.APP, + self.secrets.password_key(KibanaserverUser), + data.credentials.kibana_password, + ) + self.secrets.put( + Scope.APP, + self.secrets.hash_key(KibanaserverUser), + data.credentials.kibana_password_hash, ) - secrets.put( - Scope.APP, secrets.hash_key(KibanaserverUser), data.credentials.kibana_password_hash + self.secrets.put( + Scope.APP, self.secrets.password_key(COSUser), data.credentials.monitor_password ) - secrets.put(Scope.APP, secrets.password_key(COSUser), data.credentials.monitor_password) - secrets.put_object(Scope.APP, CertType.APP_ADMIN.val, data.credentials.admin_tls) + self.secrets.put_object(Scope.APP, CertType.APP_ADMIN.val, data.credentials.admin_tls) # store the app admin TLS resources if not stored self.charm.tls.store_new_tls_resources(CertType.APP_ADMIN, data.credentials.admin_tls) @@ -806,11 +916,11 @@ def _cm_nodes(self, orchestrators: PeerClusterOrchestrators) -> List[Node]: if rel_id == -1: continue - data = self.get_obj_from_rel(key="data", rel_id=rel_id) + data = self.get_from_rel(key="data", rel_id=rel_id, remote_app=True) if not data: # not ready yet continue - data = PeerClusterRelData.from_dict(data) + data = self.peer_cm.rel_data_from_str(data) cm_nodes = {**cm_nodes, **{node.name: node for node in data.cm_nodes}} # attempt to have an opensearch reported list of CMs - the response @@ -847,7 +957,7 @@ def _error_set_from_providers( error = None for rel_id in orchestrator_rel_ids: - data = self.get_obj_from_rel("data", rel_id=rel_id) + data = self.get_from_rel("data", rel_id=rel_id, remote_app=True) error_data = self.get_obj_from_rel("error_data", rel_id=rel_id) if not data and not error_data: # relation data still incomplete return True diff --git a/lib/charms/opensearch/v0/opensearch_secrets.py b/lib/charms/opensearch/v0/opensearch_secrets.py index c36e7b9f6..d65895238 100644 --- a/lib/charms/opensearch/v0/opensearch_secrets.py +++ b/lib/charms/opensearch/v0/opensearch_secrets.py @@ -27,7 +27,7 @@ Scope, SecretCache, ) -from ops import JujuVersion, Secret, SecretNotFoundError +from ops import JujuVersion, Relation, Secret, SecretNotFoundError from ops.charm import SecretChangedEvent from ops.framework import Object from overrides import override @@ -368,3 +368,13 @@ def delete(self, scope: Scope, key: str) -> None: self._remove_juju_secret(scope, key) logging.debug(f"Deleted secret {scope}:{key}") + + def get_secret_id(self, scope: Scope, key: str) -> Optional[str]: + """Get the secret ID from the cache.""" + label = self.label(scope, key) + return self._charm.peers_data.get(scope, label) + + def grant_secret_to_relation(self, secret_id: int, relation: Relation): + """Grant a secret to a relation.""" + secret = self._charm.model.get_secret(id=secret_id) + secret.grant(relation) diff --git a/tests/unit/lib/test_opensearch_secrets.py b/tests/unit/lib/test_opensearch_secrets.py index d84b559df..402686f0e 100644 --- a/tests/unit/lib/test_opensearch_secrets.py +++ b/tests/unit/lib/test_opensearch_secrets.py @@ -185,3 +185,15 @@ def test_bad_label(self): @parameterized.expand([Scope.APP, Scope.UNIT]) def test_put_and_get_complex_obj(self, scope): return + + def test_get_secret_id(self): + # add a secret to the store + content = {"secret": "value"} + self.store.put(Scope.APP, "super-secret-key", content) + # get the secret id + secret_id = self.store.get_secret_id(Scope.APP, "super-secret-key") + self.assertIsNotNone(secret_id) + # check the secret content + secret = self.charm.model.get_secret(id=secret_id) + secret_content = secret.get_content() + self.assertDictEqual(secret_content, {"super-secret-key": str(content)})