Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-3654] Peer cluster data stored in a secret #463

Open
wants to merge 15 commits into
base: 2/edge
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ on:
jobs:
lint:
name: Lint
uses: canonical/data-platform-workflows/.github/workflows/lint.yaml@v22.0.0
uses: canonical/data-platform-workflows/.github/workflows/lint.yaml@v23.0.4

unit-test:
name: Unit test charm
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Install tox & poetry
run: |
pipx install tox
Expand All @@ -39,7 +39,7 @@ jobs:
timeout-minutes: 5
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
- run: |
Expand Down
66 changes: 64 additions & 2 deletions lib/charms/opensearch/v0/opensearch_peer_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
# See LICENSE file for licensing details.

"""Class for Managing simple or large deployments and configuration related changes."""
import json
import logging
from datetime import datetime
from typing import TYPE_CHECKING, List, Literal, Optional

from charms.opensearch.v0.constants_charm import (
AdminUser,
CMRoleRemovalForbidden,
CmVoRolesProvidedInvalid,
COSUser,
DataRoleRemovalForbidden,
KibanaserverUser,
PClusterNoRelation,
PClusterWrongNodesCountForQuorum,
PClusterWrongRelation,
Expand Down Expand Up @@ -525,10 +529,10 @@ def rel_data(self) -> Optional[PeerClusterRelData]:
rel = self._charm.model.get_relation(
PeerClusterOrchestratorRelationName, orchestrators.main_rel_id
)
if not (data := rel.data[rel.app].get("data")):
if not (redacted_dict_str := rel.data[rel.app].get("data")):
skourta marked this conversation as resolved.
Show resolved Hide resolved
return None

return PeerClusterRelData.from_str(data)
return self.rel_data_from_redacted_str(redacted_dict_str)

def _pre_validate_roles_change(self, new_roles: List[str], prev_roles: List[str]):
"""Validate that the config changes of roles are allowed to happen."""
Expand Down Expand Up @@ -590,3 +594,61 @@ def _deployment_type(config: PeerClusterConfig, start_mode: StartMode) -> Deploy
if not config.init_hold
else DeploymentType.FAILOVER_ORCHESTRATOR
)

def rel_data_from_redacted_str(self, redacted_dict_str: str) -> PeerClusterRelData | None:
skourta marked this conversation as resolved.
Show resolved Hide resolved
"""Construct the peer cluster rel data from the secret data."""
content = json.loads(redacted_dict_str)
credentials = content["credentials"]

credentials["admin_password"] = (
self._charm.model.get_secret(id=credentials["admin_password"])
.get_content()
.get(self._charm.secrets.password_key(AdminUser))
)

credentials["admin_password_hash"] = (
self._charm.model.get_secret(id=credentials["admin_password_hash"])
.get_content()
.get(self._charm.secrets.hash_key(AdminUser))
)

credentials["kibana_password"] = (
self._charm.model.get_secret(id=credentials["kibana_password"])
.get_content()
.get(self._charm.secrets.password_key(KibanaserverUser))
)

credentials["kibana_password_hash"] = (
self._charm.model.get_secret(id=credentials["kibana_password_hash"])
.get_content()
.get(self._charm.secrets.hash_key(KibanaserverUser))
)

if "monitor_password" in credentials:
credentials["monitor_password"] = (
self._charm.model.get_secret(id=credentials["monitor_password"])
.get_content()
.get(self._charm.secrets.password_key(COSUser))
)

if "admin_tls" in credentials:
credentials["admin_tls"] = self._charm.model.get_secret(
id=credentials["admin_tls"]
).get_content()

if (
"s3" in credentials
and credentials["s3"].get("access-key")
and credentials["s3"].get("secret-key")
):
credentials["s3"]["access-key"] = (
self._charm.model.get_secret(id=credentials["s3"]["access-key"])
.get_content()
.get("s3-access-key")
)
credentials["s3"]["secret-key"] = (
self._charm.model.get_secret(id=credentials["s3"]["secret-key"])
.get_content()
.get("s3-secret-key")
)
return PeerClusterRelData.from_dict(content)
173 changes: 140 additions & 33 deletions lib/charms/opensearch/v0/opensearch_relation_peer_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""Peer clusters relation related classes for OpenSearch."""
import json
import logging
from hashlib import sha1
from typing import TYPE_CHECKING, Any, Dict, List, MutableMapping, Optional, Union

from charms.opensearch.v0.constants_charm import (
Expand Down Expand Up @@ -75,6 +76,7 @@ def __init__(self, charm: "OpenSearchBaseCharm", relation_name: str):
self.relation_name = relation_name
self.charm = charm
self.peer_cm = charm.opensearch_peer_cm
self.secrets = self.charm.secrets

def get_from_rel(
self, key: str, rel_id: int = None, remote_app: bool = False
Expand Down Expand Up @@ -260,6 +262,13 @@ def refresh_relation_data(
# compute the data that needs to be broadcast to all related clusters (success or error)
skourta marked this conversation as resolved.
Show resolved Hide resolved
rel_data = self._rel_data(deployment_desc, orchestrators)

# if rel_data is an error, prepare to broadcast it to all related clusters
skourta marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(rel_data, PeerClusterRelData):
rel_data_redacted_dict = self._rel_data_redacted_dict(rel_data)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is it needed to serialize into a dict?


# grant the secrets inside the rel_data to all the related clusters
self._grant_rel_data_secrets(rel_data_redacted_dict, all_relation_ids)

# exit if current cluster should not have been considered a provider
if self._notify_if_wrong_integration(rel_data, all_relation_ids) and event_rel_id:
self.delete_from_rel("trigger", rel_id=event_rel_id)
Expand All @@ -281,9 +290,9 @@ def refresh_relation_data(
orchestrators[f"{cluster_type}_app"] = deployment_desc.app.to_dict()
self.charm.peers_data.put_object(Scope.APP, "orchestrators", orchestrators)

peer_rel_data_key, should_defer = "data", False
should_defer = False
if isinstance(rel_data, PeerClusterRelErrorData):
peer_rel_data_key, should_defer = "error_data", rel_data.should_wait
should_defer = rel_data.should_wait

# save the orchestrators of this fleet
for rel_id in all_relation_ids:
Expand All @@ -299,9 +308,19 @@ def refresh_relation_data(
# there is no error to broadcast - we clear any previously broadcasted error
if isinstance(rel_data, PeerClusterRelData):
self.delete_from_rel("error_data", rel_id=rel_id)

# are we potentially overriding stuff here?
self.put_in_rel(data={peer_rel_data_key: rel_data.to_str()}, rel_id=rel_id)
# we add the hash of the rel_data to only emit a change event
# if the data has actually changed
self.put_in_rel(
data={
"data": json.dumps(rel_data_redacted_dict),
"rel_data_hash": sha1(
json.dumps(rel_data.to_dict(), sort_keys=True).encode()
).hexdigest(),
},
rel_id=rel_id,
)
else:
self.put_in_rel(data={"error_data": rel_data.to_str()}, rel_id=rel_id)

if can_defer and should_defer:
event.defer()
Expand Down Expand Up @@ -376,18 +395,25 @@ def _s3_credentials(
return None

# As the main orchestrator, this application must set the S3 information.
access_key = self.charm.backup.s3_client.get_s3_connection_info().get("access-key")
secret_key = self.charm.backup.s3_client.get_s3_connection_info().get("secret-key")

# set the secrets in the charm
self.charm.secrets.put(Scope.APP, "s3-access-key", access_key)
self.charm.secrets.put(Scope.APP, "s3-secret-key", secret_key)
skourta marked this conversation as resolved.
Show resolved Hide resolved

return S3RelDataCredentials(
access_key=self.charm.backup.s3_client.get_s3_connection_info().get("access-key"),
secret_key=self.charm.backup.s3_client.get_s3_connection_info().get("secret-key"),
access_key=access_key,
secret_key=secret_key,
)

if not self.charm.secrets.get(Scope.APP, "access-key"):
if not self.charm.secrets.get(Scope.APP, "s3-access-key"):
return None

# Return what we have received from the peer relation
return S3RelDataCredentials(
access_key=self.charm.secrets.get(Scope.APP, "access-key"),
secret_key=self.charm.secrets.get(Scope.APP, "secret-key"),
access_key=self.charm.secrets.get(Scope.APP, "s3-access-key"),
secret_key=self.charm.secrets.get(Scope.APP, "s3-secret-key"),
)

def _rel_data(
Expand All @@ -401,20 +427,27 @@ def _rel_data(
# peer rel data for requirers to show a blocked status until it's fully
# ready (will receive a subsequent
try:
secrets = self.charm.secrets
return PeerClusterRelData(
cluster_name=deployment_desc.config.cluster_name,
cm_nodes=self._fetch_local_cm_nodes(deployment_desc),
credentials=PeerClusterRelDataCredentials(
admin_username=AdminUser,
admin_password=secrets.get(Scope.APP, secrets.password_key(AdminUser)),
admin_password_hash=secrets.get(Scope.APP, secrets.hash_key(AdminUser)),
kibana_password=secrets.get(Scope.APP, secrets.password_key(KibanaserverUser)),
kibana_password_hash=secrets.get(
Scope.APP, secrets.hash_key(KibanaserverUser)
admin_password=self.secrets.get(
Scope.APP, self.secrets.password_key(AdminUser)
),
monitor_password=secrets.get(Scope.APP, secrets.password_key(COSUser)),
admin_tls=secrets.get_object(Scope.APP, CertType.APP_ADMIN.val),
admin_password_hash=self.secrets.get(
Scope.APP, self.secrets.hash_key(AdminUser)
),
kibana_password=self.secrets.get(
Scope.APP, self.secrets.password_key(KibanaserverUser)
),
kibana_password_hash=self.secrets.get(
Scope.APP, self.secrets.hash_key(KibanaserverUser)
),
monitor_password=self.secrets.get(
Scope.APP, self.secrets.password_key(COSUser)
),
admin_tls=self.secrets.get_object(Scope.APP, CertType.APP_ADMIN.val),
s3=self._s3_credentials(deployment_desc),
),
deployment_desc=deployment_desc,
Expand Down Expand Up @@ -519,6 +552,72 @@ def _fetch_local_cm_nodes(self, deployment_desc: DeploymentDescription) -> List[
if node.is_cm_eligible() and node.app.id == deployment_desc.app.id
]

def _rel_data_redacted_dict(self, rel_data: PeerClusterRelData) -> dict[str, Any]:
"""Convert the secret data to a dict for storage in a secret."""
skourta marked this conversation as resolved.
Show resolved Hide resolved
# If we use the values of the secrets then the size of the secret
# would be too large for juju we store the secret ids instead and
# fetch the secrets when needed in the requirer side
skourta marked this conversation as resolved.
Show resolved Hide resolved
redacted_dict = rel_data.to_dict()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is needed to serialize to a dict, why not work with the object and its attributes directly? I would rather we work with the former.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not want to mutate the object directly. We can either use the dict or copy the object and update the content of the secret to their respective ids.

Copy link
Contributor Author

@skourta skourta Nov 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Mehdi-Bendriss We cannot directly use the PeerClusterRelData object directly because of the admin_tls field in the credentials which is dict. The secret IDs are str. I can either create a new model for the redacted peer cluster relation data or stick with the dict. WDYT?


redacted_dict["credentials"] = {
"admin_username": AdminUser,
"admin_password": self.secrets.get_secret_id(
Scope.APP, self.secrets.password_key(AdminUser)
),
"admin_password_hash": self.secrets.get_secret_id(
Scope.APP, self.secrets.hash_key(AdminUser)
),
"kibana_password": self.secrets.get_secret_id(
Scope.APP, self.secrets.password_key(KibanaserverUser)
),
"kibana_password_hash": self.secrets.get_secret_id(
Scope.APP, self.secrets.hash_key(KibanaserverUser)
),
}

if monitor_password := self.secrets.get_secret_id(
Scope.APP, self.secrets.password_key(COSUser)
):
redacted_dict["credentials"]["monitor_password"] = monitor_password
if admin_tls := self.secrets.get_secret_id(Scope.APP, CertType.APP_ADMIN.val):
redacted_dict["credentials"]["admin_tls"] = admin_tls

if (
rel_data.credentials.s3
and rel_data.credentials.s3.access_key
and rel_data.credentials.s3.secret_key
):
redacted_dict["credentials"]["s3"] = {
"access-key": self.secrets.get_secret_id(Scope.APP, "s3-access-key"),
"secret-key": self.secrets.get_secret_id(Scope.APP, "s3-secret-key"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise, this secret should be caught straight from the s3 relation, once it supports granting secrets around.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and it should be only one s3 secret, not 2 like we currently do

}

return redacted_dict

def _grant_rel_data_secrets(
self, rel_data_secret_content: dict[str, Any], all_rel_ids: list[int]
):
"""Grant the secrets to all the related apps."""
credentials = rel_data_secret_content["credentials"]
for key, secret_id in credentials.items():
# admin-username is not secrets
if key == "admin_username":
continue

for rel_id in all_rel_ids:
if relation := self.get_rel(rel_id=rel_id):
if key == "s3":
if secret_id["access-key"]:
self.secrets.grant_secret_to_relation(
secret_id["access-key"], relation
)
if secret_id["secret-key"]:
self.secrets.grant_secret_to_relation(
secret_id["secret-key"], relation
)
else:
self.secrets.grant_secret_to_relation(secret_id, relation)


class OpenSearchPeerClusterRequirer(OpenSearchPeerClusterRelation):
"""Peer cluster relation requirer class."""
Expand Down Expand Up @@ -573,8 +672,7 @@ def _on_peer_cluster_relation_changed(self, event: RelationChangedEvent): # noq
return

# fetch the success data
data = PeerClusterRelData.from_str(data["data"])

data = self.peer_cm.rel_data_from_redacted_str(data["data"])
# check errors that can only be figured out from the requirer side
if self._error_set_from_requirer(orchestrators, deployment_desc, data, event.relation.id):
return
Expand Down Expand Up @@ -619,18 +717,27 @@ def _on_peer_cluster_relation_changed(self, event: RelationChangedEvent): # noq
def _set_security_conf(self, data: PeerClusterRelData) -> None:
"""Store security related config."""
# set admin secrets
secrets = self.charm.secrets
secrets.put(Scope.APP, secrets.password_key(AdminUser), data.credentials.admin_password)
secrets.put(Scope.APP, secrets.hash_key(AdminUser), data.credentials.admin_password_hash)
secrets.put(
Scope.APP, secrets.password_key(KibanaserverUser), data.credentials.kibana_password
self.secrets.put(
Scope.APP, self.secrets.password_key(AdminUser), data.credentials.admin_password
)
self.secrets.put(
Scope.APP, self.secrets.hash_key(AdminUser), data.credentials.admin_password_hash
)
self.secrets.put(
Scope.APP,
self.secrets.password_key(KibanaserverUser),
data.credentials.kibana_password,
)
self.secrets.put(
Scope.APP,
self.secrets.hash_key(KibanaserverUser),
data.credentials.kibana_password_hash,
)
secrets.put(
Scope.APP, secrets.hash_key(KibanaserverUser), data.credentials.kibana_password_hash
self.secrets.put(
Scope.APP, self.secrets.password_key(COSUser), data.credentials.monitor_password
)
secrets.put(Scope.APP, secrets.password_key(COSUser), data.credentials.monitor_password)

secrets.put_object(Scope.APP, CertType.APP_ADMIN.val, data.credentials.admin_tls)
self.secrets.put_object(Scope.APP, CertType.APP_ADMIN.val, data.credentials.admin_tls)

# store the app admin TLS resources if not stored
self.charm.tls.store_new_tls_resources(CertType.APP_ADMIN, data.credentials.admin_tls)
Expand Down Expand Up @@ -806,11 +913,11 @@ def _cm_nodes(self, orchestrators: PeerClusterOrchestrators) -> List[Node]:
if rel_id == -1:
continue

data = self.get_obj_from_rel(key="data", rel_id=rel_id)
if not data: # not ready yet
redacted_dict_str = self.get_from_rel(key="data", rel_id=rel_id, remote_app=True)
if not redacted_dict_str: # not ready yet
continue

data = PeerClusterRelData.from_dict(data)
data = self.peer_cm.rel_data_from_redacted_str(redacted_dict_str)
cm_nodes = {**cm_nodes, **{node.name: node for node in data.cm_nodes}}

# attempt to have an opensearch reported list of CMs - the response
Expand Down Expand Up @@ -847,7 +954,7 @@ def _error_set_from_providers(

error = None
for rel_id in orchestrator_rel_ids:
data = self.get_obj_from_rel("data", rel_id=rel_id)
data = self.get_from_rel("data", rel_id=rel_id, remote_app=True)
error_data = self.get_obj_from_rel("error_data", rel_id=rel_id)
if not data and not error_data: # relation data still incomplete
return True
Expand Down
Loading
Loading