diff --git a/docs/jumpstart/kickoff.mdx b/docs/jumpstart/kickoff.mdx index 1acdff7a0..b614c4232 100644 --- a/docs/jumpstart/kickoff.mdx +++ b/docs/jumpstart/kickoff.mdx @@ -4,27 +4,37 @@ sidebar_label: "Kick Off" sidebar_position: 2 describe: Set project expectations for Jumpstarts with Cloud Posse --- -import Link from '@docusaurus/Link' -import KeyPoints from '@site/src/components/KeyPoints' -import Steps from '@site/src/components/Steps' -import Step from '@site/src/components/Step' -import StepNumber from '@site/src/components/StepNumber' -import Intro from '@site/src/components/Intro' -import ActionCard from '@site/src/components/ActionCard' -import PrimaryCTA from '@site/src/components/PrimaryCTA' -import TaskList from '@site/src/components/TaskList' -import Admonition from '@theme/Admonition'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; + +import Link from "@docusaurus/Link"; +import KeyPoints from "@site/src/components/KeyPoints"; +import Steps from "@site/src/components/Steps"; +import Step from "@site/src/components/Step"; +import StepNumber from "@site/src/components/StepNumber"; +import Intro from "@site/src/components/Intro"; +import ActionCard from "@site/src/components/ActionCard"; +import PrimaryCTA from "@site/src/components/PrimaryCTA"; +import TaskList from "@site/src/components/TaskList"; +import Admonition from "@theme/Admonition"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; -The kickoff process for Jumpstart engagements with Cloud Posse ensures a smooth start with clear communication for a successful project delivery. During the call, we will confirm contract requirements and set project expectations. We also cover how Cloud Posse will deploy your infrastructure in layers using our reference architecture and introduce various support options, including Slack channels, customer workshops, and office hours. + The kickoff process for Jumpstart engagements with Cloud Posse ensures a + smooth start with clear communication for a successful project delivery. + During the call, we will confirm contract requirements and set project + expectations. We also cover how Cloud Posse will deploy your infrastructure in + layers using our reference architecture and introduce various support options, + including Slack channels, customer workshops, and office hours. -- **Kickoff Process:** Establish roles, confirm requirements, and set project expectations for a smooth start -- **Implementation Phase:** Understand how we go about provisioning infrastructure in layers using the reference architecture, including comprehensive handoffs with documentation -- **Support and Communication:** Review the multiple channels for support, including Slack, customer workshops, office hours, and detailed documentation to ensure successful engagement + - **Kickoff Process:** Establish roles, confirm requirements, and set project + expectations for a smooth start - **Implementation Phase:** Understand how we + go about provisioning infrastructure in layers using the reference + architecture, including comprehensive handoffs with documentation - **Support + and Communication:** Review the multiple channels for support, including + Slack, customer workshops, office hours, and detailed documentation to ensure + successful engagement ## Preparing for the Kickoff Meeting @@ -32,9 +42,10 @@ The kickoff process for Jumpstart engagements with Cloud Posse ensures a smooth This document outlines what to expect from your first call with Cloud Posse. In order to make the most of this meeting, please read through this document and come prepared with questions. In particular, please review the following: -1. Identify stakeholders and establish ownership of the engagement within your Organization. -2. Read through the [Design Decisions](#review-design-decisions) and prepare questions and decisions. -3. Review the list of [Actions Items](#action-items) following this call. + 1. Identify stakeholders and establish ownership of the engagement within your + Organization. 2. Read through the [Design Decisions](#review-design-decisions) + and prepare questions and decisions. 3. Review the list of [Actions + Items](#action-items) following this call. ## Kickoff Meeting Agenda @@ -44,6 +55,7 @@ This document outlines what to expect from your first call with Cloud Posse. In ### Introductions Here we will review who is on the call, what their roles are, and identify our technical point of contact at Cloud Posse. We will also review the working timezones of the teams. + @@ -58,6 +70,7 @@ This document outlines what to expect from your first call with Cloud Posse. In If you come prepared for Hand-Off calls, we can skip the lecture and spend more time answering questions or working through hands-on labs. + @@ -82,6 +95,7 @@ This document outlines what to expect from your first call with Cloud Posse. In This is a great opportunity to get your questions answered and to get help with your project. + @@ -94,18 +108,21 @@ This document outlines what to expect from your first call with Cloud Posse. In This is a good way to keep up with the latest developments and trends in the DevOps community. Sign up at [cloudposse.com/office-hours](https://cloudposse.com/office-hours/) + ### SweetOps Slack If you are looking for a community of like-minded DevOps practitioners, please join the [SweetOps Slack](https://slack.sweetops.com/). + ### Review Design Decisions + - [ ] [Decide on Terraform Version](/layers/project/design-decisions/decide-on-terraform-version) - [ ] [Decide on Namespace Abbreviation](/layers/project/design-decisions/decide-on-namespace-abbreviation) - [ ] [Decide on Infrastructure Repository Name](/layers/project/design-decisions/decide-on-infrastructure-repository-name) - [ ] [Decide on Email Address Format for AWS Accounts](/layers/accounts/design-decisions/decide-on-email-address-format-for-aws-accounts) @@ -117,6 +134,11 @@ This document outlines what to expect from your first call with Cloud Posse. In - [ ] [Decide on Vanity Domain](/layers/network/design-decisions/decide-on-vanity-branded-domain) - [ ] [Decide on Release Engineering Strategy](/layers/software-delivery/design-decisions/decide-on-release-engineering-strategy) + + + These are the design decisions you can customize as part of the Jumpstart package. [All other decisions are pre-made](/tags/design-decision/) for you, but you’re welcome to review them. If you’d like to make additional changes, [let us know—we’re happy to provide a quote](https://cloudposse.com/meet). + + @@ -136,10 +158,10 @@ This document outlines what to expect from your first call with Cloud Posse. In - [Release Engineering](/layers/software-delivery) - Final Call (Sign-off) + - ## How to Succeed Cloud Posse has noticed several patterns that lead to successful projects. @@ -149,24 +171,28 @@ Cloud Posse has noticed several patterns that lead to successful projects. ### Come Prepared Review six pagers and documentation before Hand-Off calls. This will help you to know what questions need to be asked. Coming unprepared will lead to a lot of questions and back-and-forth. This will slow down material resulting in less time for new material. + ### Take Initiative The most successful customers take initiative to make customizations to their Reference Architecture. This is a great way to make the Reference Architecture your own. It also helps to build a deeper understanding of the Reference Architecture and how it works. + ### Cameras On We recommend that all participants have their cameras on. This helps to build trust and rapport. It also helps to keep everyone engaged and focused. This also lets us gage how everyone is understanding the material. If you are having trouble understanding something, please ask questions. + ### Ask Questions We encourage you to ask questions. We want to make sure that everyone understands the material. We also want to make sure that we are providing the right level of detail. Our meetings are intended to be interactive and encourage conversation. Please feel free to interject at any time if you have a question or a comment to add to the discussion. + @@ -187,4 +213,3 @@ Both the [Shared Customer Workshops](#shared-customer-workshop) and [Community O ### Documentation You can always find how-to guides, design decisions, and other helpful pages at [docs.cloudposse.com](/) - diff --git a/docs/layers/accounts/design-decisions/decide-on-aws-account-flavors-and-organizational-units.mdx b/docs/layers/accounts/design-decisions/decide-on-aws-account-flavors-and-organizational-units.mdx index ce4e4860c..d2a66bafe 100644 --- a/docs/layers/accounts/design-decisions/decide-on-aws-account-flavors-and-organizational-units.mdx +++ b/docs/layers/accounts/design-decisions/decide-on-aws-account-flavors-and-organizational-units.mdx @@ -4,12 +4,19 @@ sidebar_label: "AWS Accounts and OUs" sidebar_position: 2 description: "Decide how to organize workloads for isolation and management" refarch_id: REFARCH-55 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; - When setting up your AWS infrastructure, you need to decide how to organize your workloads across multiple AWS accounts to ensure optimal isolation and management. This involves deciding the appropriate account structure and organizational units (OUs) that align with your operational needs and security requirements. + When setting up your AWS infrastructure, you need to decide how to organize + your workloads across multiple AWS accounts to ensure optimal isolation and + management. This involves deciding the appropriate account structure and + organizational units (OUs) that align with your operational needs and security + requirements. ## Context and Problem Statement @@ -58,16 +65,21 @@ It is advised to keep the names of accounts as short as possible because of reso

- The "root" (parent, billing) account creates all child accounts. The root account has special capabilities not - found in any other account + The "root" (parent, billing) account creates all child accounts. The root + account has special capabilities not found in any other account

  • - An administrator in the root account by default has the OrganizationAccountAccessRole{" "} - to all other accounts (admin access) + An administrator in the root account by default has the{" "} + OrganizationAccountAccessRole to all other accounts (admin + access) +
  • +
  • + Organizational CloudTrails can only be provisioned in this account +
  • +
  • + It’s the only account that can have member accounts associated with it
  • -
  • Organizational CloudTrails can only be provisioned in this account
  • -
  • It’s the only account that can have member accounts associated with it
  • Service Control Policies can only be set in this account
  • It’s the only account that can manage the AWS Organization
@@ -76,16 +88,20 @@ It is advised to keep the names of accounts as short as possible because of reso plat-prod
-

The "production" is the account where you run your most mission-critical applications

+

+ The "production" is the account where you run your most mission-critical + applications +

plat-staging

- The “staging” account is where QA and integration tests will run for public consumption. This is production for QA - engineers and partners doing integration tests. It must be stable for third-parties to test. It runs a kubernetes - cluster. + The “staging” account is where QA and integration tests will run for + public consumption. This is production for QA engineers and partners doing + integration tests. It must be stable for third-parties to test. It runs a + kubernetes cluster.

@@ -93,8 +109,9 @@ It is advised to keep the names of accounts as short as possible because of reso

- The "sandbox" account is where you let your developers have fun and break things. Developers get admin. This is - where changes happen first. It will be used by developers who need the bleeding edge. Only DevOps work here or + The "sandbox" account is where you let your developers have fun and break + things. Developers get admin. This is where changes happen first. It will + be used by developers who need the bleeding edge. Only DevOps work here or developers trying to get net-new applications added to tools like slice.

@@ -103,9 +120,10 @@ It is advised to keep the names of accounts as short as possible because of reso

- The "dev" account is where to run automated tests, load tests infrastructure code. This is where the entire - engineering organization operates daily. It needs to be stable for developers. This environment is Production for - developers to develop code. + The "dev" account is where to run automated tests, load tests + infrastructure code. This is where the entire engineering organization + operates daily. It needs to be stable for developers. This environment is + Production for developers to develop code.

@@ -124,15 +142,19 @@ It is advised to keep the names of accounts as short as possible because of reso core-corp
-

The "corp" account is where you run the shared platform services for the company. Google calls it “corp”

+

+ The "corp" account is where you run the shared platform services for the + company. Google calls it “corp” +

core-security

- The "security" account is where to run automated security scanning software that might operate in a read-only - fashion against the audit account. + The "security" account is where to run automated security scanning + software that might operate in a read-only fashion against the audit + account.

@@ -140,26 +162,32 @@ It is advised to keep the names of accounts as short as possible because of reso

- The "identity" account is where to add users and delegate access to the other accounts and is where users log in + The "identity" account is where to add users and delegate access to the + other accounts and is where users log in

core-network
-

The “network” account is where the transit gateway is managed and all inter-account routing

+

+ The “network” account is where the transit gateway is managed and all + inter-account routing +

core-dns

- The “dns” account is the owner for all zones (may have a legal role with Route53Registrar.* + The “dns” account is the owner for all zones (may have a legal role with{" "} + Route53Registrar.* permissions). Cannot touch zones or anything else. Includes billing.

- Example use-case: Legal team needs to manage DNS and it’s easier to give them access to an account specific to DNS - rather than multiple set of resources. + Example use-case: Legal team needs to manage DNS and it’s easier to give + them access to an account specific to DNS rather than multiple set of + resources.

@@ -167,12 +195,13 @@ It is advised to keep the names of accounts as short as possible because of reso

- The “automation” account is where any gitops automation will live. Some automation (like Spacelift) has “god” mode - in this account. + The “automation” account is where any gitops automation will live. Some + automation (like Spacelift) has “god” mode in this account.

- The network account will typically have transit gateway access to all other accounts, therefore we want to limit - what is deployed in the automation account to only those services which need it. + The network account will typically have transit gateway access to all + other accounts, therefore we want to limit what is deployed in the + automation account to only those services which need it.

@@ -180,8 +209,8 @@ It is advised to keep the names of accounts as short as possible because of reso

- This “artifacts” account is where we recommend centralizing and storing artifacts (e.g. ECR, assets, etc) for - CI/CD + This “artifacts” account is where we recommend centralizing and storing + artifacts (e.g. ECR, assets, etc) for CI/CD

@@ -189,13 +218,15 @@ It is advised to keep the names of accounts as short as possible because of reso

- For public S3 buckets, public ECRs, public AMIs, anything public. This will be the only account that doesn’t have - a SCP that blocks public s3 buckets. + For public S3 buckets, public ECRs, public AMIs, anything public. This + will be the only account that doesn’t have a SCP that blocks public s3 + buckets.

Use-cases

  • - All s3 buckets are private by default using a SCP in every account except for the public account + All s3 buckets are private by default using a SCP in every account + except for the public account
@@ -204,7 +235,8 @@ It is advised to keep the names of accounts as short as possible because of reso

- The "data" account is where the quants live =) Runs systems like Airflow, Jupyterhub, Batch processing, Redshift + The "data" account is where the quants live =) Runs systems like Airflow, + Jupyterhub, Batch processing, Redshift

@@ -212,9 +244,11 @@ It is advised to keep the names of accounts as short as possible because of reso

- The “$tenant” account is a symbolic account representing dedicated account environment. It’s architecture will - likely resemble prod. This relates to{" "} - this link + The “$tenant” account is a symbolic account representing dedicated account + environment. It’s architecture will likely resemble prod. This relates to{" "} + + this link +

diff --git a/docs/layers/accounts/design-decisions/decide-on-aws-organization-strategy.mdx b/docs/layers/accounts/design-decisions/decide-on-aws-organization-strategy.mdx index 75d74bd0c..36ef7f826 100644 --- a/docs/layers/accounts/design-decisions/decide-on-aws-organization-strategy.mdx +++ b/docs/layers/accounts/design-decisions/decide-on-aws-organization-strategy.mdx @@ -4,12 +4,21 @@ sidebar_label: "AWS Organization Strategy" sidebar_position: 1 description: Decide whether to create or reuse AWS Organizations refarch_id: REFARCH-471 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -When establishing your AWS infrastructure strategy, you need to decide whether to create a new AWS Organization or reuse an existing one. This decision involves evaluating the limitations and capabilities of AWS Control Tower, the special roles within the root account, and the ability to manage organizational configurations. Cloud Posse recommends starting with a new organization to ensure a clear separation between historical and next-gen infrastructure, leveraging transit gateways or VPC peering for integration. + When establishing your AWS infrastructure strategy, you need to decide whether + to create a new AWS Organization or reuse an existing one. This decision + involves evaluating the limitations and capabilities of AWS Control Tower, the + special roles within the root account, and the ability to manage + organizational configurations. Cloud Posse recommends starting with a new + organization to ensure a clear separation between historical and next-gen + infrastructure, leveraging transit gateways or VPC peering for integration. :::tip diff --git a/docs/layers/accounts/design-decisions/decide-on-aws-support.mdx b/docs/layers/accounts/design-decisions/decide-on-aws-support.mdx index b2cf665c9..8f0f9ba8c 100644 --- a/docs/layers/accounts/design-decisions/decide-on-aws-support.mdx +++ b/docs/layers/accounts/design-decisions/decide-on-aws-support.mdx @@ -4,12 +4,18 @@ sidebar_label: "AWS Support" sidebar_position: 3 description: "Decide which accounts need AWS Support" refarch_id: REFARCH-417 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -When setting up AWS Support, you need to decide which accounts require paid support plans. If you don’t have an AWS Enterprise Agreement, it’s recommended to enable Business-level support in the root account to expedite requests and manage organizational limits effectively. + When setting up AWS Support, you need to decide which accounts require paid + support plans. If you don’t have an AWS Enterprise Agreement, it’s recommended + to enable Business-level support in the root account to expedite requests and + manage organizational limits effectively. AWS Support is always enabled on a per-account basis. With an AWS Enterprise Agreement, AWS support is already included diff --git a/docs/layers/accounts/design-decisions/decide-on-email-address-format-for-aws-accounts.mdx b/docs/layers/accounts/design-decisions/decide-on-email-address-format-for-aws-accounts.mdx index 0f0a53eaa..57d13f010 100644 --- a/docs/layers/accounts/design-decisions/decide-on-email-address-format-for-aws-accounts.mdx +++ b/docs/layers/accounts/design-decisions/decide-on-email-address-format-for-aws-accounts.mdx @@ -4,19 +4,27 @@ sidebar_label: "AWS Email Addresses" sidebar_position: 4 description: "Decide what emails will be used for AWS Accounts" refarch_id: REFARCH-51 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; -import Note from '@site/src/components/Note'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; +import Note from "@site/src/components/Note"; - When creating AWS accounts, you need to decide on the email address format. Each AWS account requires a unique email address that cannot be reused across multiple accounts. The chosen format should align with your organization’s email management strategy and ensure proper delivery and handling of AWS notifications. + When creating AWS accounts, you need to decide on the email address format. + Each AWS account requires a unique email address that cannot be reused across + multiple accounts. The chosen format should align with your organization’s + email management strategy and ensure proper delivery and handling of AWS + notifications. Every AWS account needs a unique email address. Email address cannot be reused across multiple AWS accounts. -we are referring AWS accounts that contain resources, not individual user accounts + we are referring AWS accounts that contain resources, not individual user + accounts ### Use Plus Addressing diff --git a/docs/layers/accounts/design-decisions/decide-on-mfa-solution-for-aws-root-accounts.mdx b/docs/layers/accounts/design-decisions/decide-on-mfa-solution-for-aws-root-accounts.mdx index 6687acd46..b679ce75a 100644 --- a/docs/layers/accounts/design-decisions/decide-on-mfa-solution-for-aws-root-accounts.mdx +++ b/docs/layers/accounts/design-decisions/decide-on-mfa-solution-for-aws-root-accounts.mdx @@ -3,12 +3,20 @@ title: "Decide on MFA Solution for AWS Root Accounts" sidebar_label: "MFA Solution" description: "Decide on MFA Solution for AWS Root Accounts" refarch_id: REFARCH-50 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -When setting up MFA for AWS root accounts, you need to decide on the most suitable solution to ensure security and manageability. The two most common options are TOTP (Time-Based One-Time Password) and U2F (Universal 2nd Factor). Cloud Posse recommends using 1Password for Teams or 1Password for Business to securely share TOTP tokens among stakeholders, ensuring both accessibility and protection. + When setting up MFA for AWS root accounts, you need to decide on the most + suitable solution to ensure security and manageability. The two most common + options are TOTP (Time-Based One-Time Password) and U2F (Universal 2nd + Factor). Cloud Posse recommends using 1Password for Teams or 1Password for + Business to securely share TOTP tokens among stakeholders, ensuring both + accessibility and protection. We need an MFA solution for protecting the master AWS accounts. The two most common options are TOTP and U2F diff --git a/docs/layers/accounts/design-decisions/decide-on-terraform-state-backend-architecture.mdx b/docs/layers/accounts/design-decisions/decide-on-terraform-state-backend-architecture.mdx index b4e8777a7..a2bac7181 100644 --- a/docs/layers/accounts/design-decisions/decide-on-terraform-state-backend-architecture.mdx +++ b/docs/layers/accounts/design-decisions/decide-on-terraform-state-backend-architecture.mdx @@ -3,12 +3,19 @@ title: "Decide on Terraform State Backend Architecture" sidebar_label: "Terraform Backends" description: Decide how to organize Terraform State across accounts refarch_id: REFARCH-522 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -When organizing your Terraform state, you need to decide on the backend architecture. Using S3, you can either opt for a single bucket, which simplifies management but grants broad access, or multiple buckets, which enhance security by segmenting access but add complexity. Consider your company’s security and compliance needs when making this decision. + When organizing your Terraform state, you need to decide on the backend + architecture. Using S3, you can either opt for a single bucket, which + simplifies management but grants broad access, or multiple buckets, which + enhance security by segmenting access but add complexity. Consider your + company’s security and compliance needs when making this decision. ## Context and Problem Statement diff --git a/docs/layers/accounts/design-decisions/design-decisions.mdx b/docs/layers/accounts/design-decisions/design-decisions.mdx index 8ad3e8c54..c1cd40cef 100644 --- a/docs/layers/accounts/design-decisions/design-decisions.mdx +++ b/docs/layers/accounts/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: "Design Decisions" sidebar_label: "Review Design Decisions" sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; - These are some of the design decisions you should be aware of when provisioning a new AWS organization. + These are some of the design decisions you should be aware of when + provisioning a new AWS organization. - + diff --git a/docs/layers/alerting/design-decisions/decide-on-default-schedules.mdx b/docs/layers/alerting/design-decisions/decide-on-default-schedules.mdx index 40b37ccc4..154111dde 100644 --- a/docs/layers/alerting/design-decisions/decide-on-default-schedules.mdx +++ b/docs/layers/alerting/design-decisions/decide-on-default-schedules.mdx @@ -4,17 +4,20 @@ sidebar_label: "Default Schedules" sidebar_position: 100 refarch_id: REFARCH-520 description: Determine the on-call schedule for teams +tags: + - design-decision --- ## Context and Problem Statement By default, an opsgenie team comes with its own schedule. Sometimes however we want different schedules for different timezones. A team spread across the world would have to manually keep track of the schedule to make sure individuals are only on call for particular hours. -
+ +
## Considered Options -### Option 1 - Use one default Schedule (Recommended) +### Option 1 - Use one default Schedule (Recommended) :::tip Our Recommendation is to use Option 1 because.... @@ -42,5 +45,3 @@ Our Recommendation is to use Option 1 because.... ## References - Links to any research, ADRs or related Jiras - - diff --git a/docs/layers/alerting/design-decisions/decide-on-incident-ruleset.mdx b/docs/layers/alerting/design-decisions/decide-on-incident-ruleset.mdx index 1c55c46ba..4906cf105 100644 --- a/docs/layers/alerting/design-decisions/decide-on-incident-ruleset.mdx +++ b/docs/layers/alerting/design-decisions/decide-on-incident-ruleset.mdx @@ -4,6 +4,8 @@ sidebar_label: "Incident Ruleset" sidebar_position: 100 refarch_id: REFARCH-519 description: Determine the rules that make an alert an incident +tags: + - design-decision --- ## Context and Problem Statement @@ -69,5 +71,3 @@ Tag based approach would mean any monitor that sends an alert with a tag `incide - [How to Implement SRE with Datadog](/layers/monitoring/datadog) - [REFARCH-519 - Decide on Default Priority to Incident Mappings](https://cloudposse.atlassian.net/browse/REFARCH-519) - - diff --git a/docs/layers/alerting/design-decisions/decide-on-teams-for-escalations.mdx b/docs/layers/alerting/design-decisions/decide-on-teams-for-escalations.mdx index ce435d8e8..94c967a1f 100644 --- a/docs/layers/alerting/design-decisions/decide-on-teams-for-escalations.mdx +++ b/docs/layers/alerting/design-decisions/decide-on-teams-for-escalations.mdx @@ -4,6 +4,8 @@ sidebar_label: "Teams for Escalations" sidebar_position: 100 refarch_id: REFARCH-468 description: Determine the teams that will be responsible for incidents +tags: + - design-decision --- ## Problem @@ -58,5 +60,3 @@ teams: - username: user@ourcompany.com role: admin ``` - - diff --git a/docs/layers/alerting/design-decisions/design-decisions.mdx b/docs/layers/alerting/design-decisions/design-decisions.mdx index 8800fcff8..016b15e58 100644 --- a/docs/layers/alerting/design-decisions/design-decisions.mdx +++ b/docs/layers/alerting/design-decisions/design-decisions.mdx @@ -3,12 +3,16 @@ title: Design Decisions sidebar_label: Review Design Decisions sidebar_position: 1 description: Review the key design decisions for implementing incident management +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; - Review the key design decisions to determine how you'll implement incident management, escalations, and alerting. + Review the key design decisions to determine how you'll implement incident + management, escalations, and alerting. - + diff --git a/docs/layers/data/design-decisions/decide-on-amazon-managed-workflows-for-apache-airflow-mwaa-requi.mdx b/docs/layers/data/design-decisions/decide-on-amazon-managed-workflows-for-apache-airflow-mwaa-requi.mdx index 2279a39e2..a3c372f1a 100644 --- a/docs/layers/data/design-decisions/decide-on-amazon-managed-workflows-for-apache-airflow-mwaa-requi.mdx +++ b/docs/layers/data/design-decisions/decide-on-amazon-managed-workflows-for-apache-airflow-mwaa-requi.mdx @@ -3,9 +3,12 @@ title: "Decide on Amazon Managed Workflows for Apache Airflow (MWAA) Requirement sidebar_label: "Amazon Managed Workflows for Apache Airflow (MWAA) Requirements" sidebar_position: 100 refarch_id: REFARCH-491 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -92,5 +95,3 @@ Create a standardized MWAA Environment based on requirements. - [Amazon Managed Workflows for Apache Airflow (MWAA): Create an Environment](https://docs.aws.amazon.com/mwaa/latest/userguide/create-environment.html) - [Amazon Managed Workflows for Apache Airflow (MWAA): Supported Versions](https://docs.aws.amazon.com/mwaa/latest/userguide/airflow-versions.html) - - diff --git a/docs/layers/data/design-decisions/decide-on-amazon-opensearch-service-elasticsearch-requirements.mdx b/docs/layers/data/design-decisions/decide-on-amazon-opensearch-service-elasticsearch-requirements.mdx index 2a8816953..4f138e265 100644 --- a/docs/layers/data/design-decisions/decide-on-amazon-opensearch-service-elasticsearch-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-amazon-opensearch-service-elasticsearch-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on Amazon OpenSearch Service (Elasticsearch) Requirements" sidebar_label: "Amazon OpenSearch Service (Elasticsearch) Requirements" sidebar_position: 100 refarch_id: REFARCH-362 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -48,19 +51,19 @@ Because the [Amazon OpenSearch Service](https://docs.aws.amazon.com/opensearch-s This, in addition to the requirements outlined in _v1 Infrastructure Requirements_, concludes that each Elasticsearch cluster will have the following requirements: -| **Requirement** | **Recommendation** | | -| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- | +| **Requirement** | **Recommendation** | | +| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ | | EBS volume size | :::caution
The volume size is limited by the size of the instance.
[https://docs.aws.amazon.com/opensearch-service/latest/developerguide/limits.html](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/limits.html)


::: |
| -| Number of nodes | 3 | | -| Instance family for each node | Depends on use-case | | -| Kibana | Whether or not Kibana is required: not required.


:::caution
If Kibana is required, we’ll need to discuss how to securely access Kibana. We recommend SAML authentication.

[https://docs.aws.amazon.com/opensearch-service/latest/developerguide/saml.html](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/saml.html)


::: | | +| Number of nodes | 3 | | +| Instance family for each node | Depends on use-case | | +| Kibana | Whether or not Kibana is required: not required.


:::caution
If Kibana is required, we’ll need to discuss how to securely access Kibana. We recommend SAML authentication.

[https://docs.aws.amazon.com/opensearch-service/latest/developerguide/saml.html](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/saml.html)


::: | | ## Consequences Provision Amazon OpenSearch Service based on these requirements using the `elasticsearch` component with terraform. - This allows for a standardized Elasticsearch cluster that satisfies the requirements required by the application stack -in each active compute environment. + in each active compute environment. - This standard size can be easily adjusted as needed, so this is an easily reversible decision. @@ -73,5 +76,3 @@ in each active compute environment. - [https://docs.aws.amazon.com/opensearch-service/latest/developerguide/saml.html](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/saml.html) - [https://aws.amazon.com/blogs/aws/amazon-elasticsearch-service-is-now-amazon-opensearch-service-and-supports-opensearch-10/](https://aws.amazon.com/blogs/aws/amazon-elasticsearch-service-is-now-amazon-opensearch-service-and-supports-opensearch-10/) - - diff --git a/docs/layers/data/design-decisions/decide-on-automated-backup-requirements.mdx b/docs/layers/data/design-decisions/decide-on-automated-backup-requirements.mdx index c3eeb3764..9a3a1f73d 100644 --- a/docs/layers/data/design-decisions/decide-on-automated-backup-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-automated-backup-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on Automated Backup Requirements" sidebar_label: "Automated Backup Requirements" sidebar_position: 100 refarch_id: REFARCH-493 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement @@ -13,7 +16,7 @@ Describe why we are making this decision or what problem we are solving. ## Considered Options -### Option 1 (Recommended) +### Option 1 (Recommended) :::tip Our Recommendation is to use Option 1 because.... @@ -51,5 +54,3 @@ Our Recommendation is to use Option 1 because.... ## References - Links to any research, ADRs or related Jiras - - diff --git a/docs/layers/data/design-decisions/decide-on-aws-backup-requirements.mdx b/docs/layers/data/design-decisions/decide-on-aws-backup-requirements.mdx index d2cf2ca41..bb2ed1c73 100644 --- a/docs/layers/data/design-decisions/decide-on-aws-backup-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-aws-backup-requirements.mdx @@ -3,13 +3,17 @@ title: "Decide on AWS Backup Requirements" sidebar_label: "AWS Backup Requirements" sidebar_position: 100 refarch_id: REFARCH-489 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem ## Context + We need a standardized way to implement backup services for AWS resources (S3, databases, EC2 instances, EFS, etc. etc.) to have the ability to restore data from points in time in the event of data loss or corruption. AWS provides a managed backup service offering called AWS Backup. [https://docs.aws.amazon.com/aws-backup/latest/devguide/whatisbackup.html](https://docs.aws.amazon.com/aws-backup/latest/devguide/whatisbackup.html) @@ -19,5 +23,3 @@ We need to determine if we are opting in or opting out using AWS Backup. ## References - [https://www.druva.com/blog/understanding-rpo-and-rto/](https://www.druva.com/blog/understanding-rpo-and-rto/) - - diff --git a/docs/layers/data/design-decisions/decide-on-aws-emr-requirements.mdx b/docs/layers/data/design-decisions/decide-on-aws-emr-requirements.mdx index da5f7f788..925167a65 100644 --- a/docs/layers/data/design-decisions/decide-on-aws-emr-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-aws-emr-requirements.mdx @@ -3,22 +3,27 @@ title: "Decide on AWS EMR Requirements" sidebar_label: "AWS EMR Requirements" sidebar_position: 100 refarch_id: REFARCH-490 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem + We need to document the requirements for the EMR cluster. ## Context + If EMR is presently deployed, the best course of action is to replicate the settings you have (share these details if that’s the case). ## Considered Options + A list of applications for the cluster. Currently supported options are: Flink, Ganglia, Hadoop, HBase, HCatalog, Hive, Hue, JupyterHub, Livy, Mahout, MXNet, Oozie, Phoenix, Pig, Presto, Spark, Sqoop, TensorFlow, Tez, Zeppelin, and ZooKeeper (as of EMR 5.25.0). For a full list of supported options, review the EMR module. ## References -- [https://github.com/cloudposse/terraform-aws-emr-cluster#inputs](https://github.com/cloudposse/terraform-aws-emr-cluster#inputs) - +- [https://github.com/cloudposse/terraform-aws-emr-cluster#inputs](https://github.com/cloudposse/terraform-aws-emr-cluster#inputs) diff --git a/docs/layers/data/design-decisions/decide-on-aws-managed-rabbitmq-requirements.mdx b/docs/layers/data/design-decisions/decide-on-aws-managed-rabbitmq-requirements.mdx index b7a01f61b..88f463b5e 100644 --- a/docs/layers/data/design-decisions/decide-on-aws-managed-rabbitmq-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-aws-managed-rabbitmq-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on AWS Managed RabbitMQ Requirements" sidebar_label: "AWS Managed RabbitMQ Requirements" sidebar_position: 100 refarch_id: REFARCH-488 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement @@ -13,7 +16,7 @@ Describe why we are making this decision or what problem we are solving. ## Considered Options -### Option 1 (Recommended) +### Option 1 (Recommended) :::tip Our Recommendation is to use Option 1 because.... @@ -51,5 +54,3 @@ Our Recommendation is to use Option 1 because.... ## References - Links to any research, ADRs or related Jiras - - diff --git a/docs/layers/data/design-decisions/decide-on-database-schema-migration-strategy.mdx b/docs/layers/data/design-decisions/decide-on-database-schema-migration-strategy.mdx index 295caceab..659ffbe64 100644 --- a/docs/layers/data/design-decisions/decide-on-database-schema-migration-strategy.mdx +++ b/docs/layers/data/design-decisions/decide-on-database-schema-migration-strategy.mdx @@ -3,9 +3,12 @@ title: "Decide on Database Schema Migration Strategy" sidebar_label: "Database Schema Migration Strategy" sidebar_position: 100 refarch_id: REFARCH-422 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ### Problem and Context @@ -62,5 +65,3 @@ We must decided how and when to run database migrations. The strategy will depen - Works well, when it works. When it fails, it’s hard to regulate what happens with the services. - Kubernetes will keep re-attempting the migration if the job exit’s non-zero. If we squash the exit code, then we don’t realize it’s failing, unless there’s other monitoring in place. - - diff --git a/docs/layers/data/design-decisions/decide-on-documentdb-requirements.mdx b/docs/layers/data/design-decisions/decide-on-documentdb-requirements.mdx index da3ba944a..e3ea4626c 100644 --- a/docs/layers/data/design-decisions/decide-on-documentdb-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-documentdb-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on DocumentDB Requirements" sidebar_label: "DocumentDB Requirements" sidebar_position: 100 refarch_id: REFARCH-479 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement @@ -46,10 +49,10 @@ DocumentDB is deployed across three availability zones when possible. The [Amazon DocumentDB Service](https://docs.aws.amazon.com/documentdb/latest/developerguide/replication.html) recommends that read replicas are of the same instance family as the primary instance: > For consistency, these replica instances should be the same instance family, and should be left to be designated as -replica instances by the DocumentDB service rather than manually designated, in order to simplify management of the -infrastructure. -This, in addition to the requirements outlined in _v1 Infrastructure Requirements_, concludes that each DocumentDB -cluster will have the following requirements: +> replica instances by the DocumentDB service rather than manually designated, in order to simplify management of the +> infrastructure. +> This, in addition to the requirements outlined in _v1 Infrastructure Requirements_, concludes that each DocumentDB +> cluster will have the following requirements: - Instance family for DB instances: `[CHANGE ME]` in non-production environments, `[CHANGE ME]` in production environments @@ -58,17 +61,17 @@ cluster will have the following requirements: - Whether or not to create DB replica instances: yes, ideally create 3 (one in each of the 3 Availability Zones) - Instance family for DB replica instances: `[CHANGE ME]` in non-production environments, `[CHANGE ME]` in production -environments + environments - Backup retention period: 1 day in non-production environments (the minimum retention period), 35 days in production -environments (the maximum retention period) + environments (the maximum retention period) ## Decision Outcome Chosen option: "Create a standardized DocumentDB cluster based on current use case", because - This allows for a standardized DocumentDB cluster that satisfies the requirements required by the application stack in -each active compute environment. + each active compute environment. ## Consequences @@ -79,5 +82,3 @@ Create a DocumentDB component and tune it to the requirements outlined above. - [https://docs.aws.amazon.com/documentdb/latest/developerguide/replication.html](https://docs.aws.amazon.com/documentdb/latest/developerguide/replication.html) - [Primary AWS Region](/layers/network/design-decisions/decide-on-primary-aws-region) - - diff --git a/docs/layers/data/design-decisions/decide-on-dynamodb-requirements.mdx b/docs/layers/data/design-decisions/decide-on-dynamodb-requirements.mdx index 5084d5beb..8669193cd 100644 --- a/docs/layers/data/design-decisions/decide-on-dynamodb-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-dynamodb-requirements.mdx @@ -3,14 +3,19 @@ title: "Decide on DynamoDB Requirements" sidebar_label: "DynamoDB Requirements" sidebar_position: 100 refarch_id: REFARCH-478 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem **DRAFT** + Requirements for DynamoDB tables deployed to each active compute environment need to be outlined before a DynamoDB component is configured and deployed. ## Context + We need to at a minimum define the following requirements: - Read/Write capacity Mode (and related settings) @@ -38,36 +43,36 @@ Currently, DynamoDB tables are used within [CHANGE ME]. DynamoDB tables use `PAY_PER_REQUEST` billing instead of `PROVISIONED` billing because: - The DynamoDB is part of a data pipeline where the table IO operations are not entirely predictable, therefore realized -throughput may be significantly below or above the provisioned throughput throughout the day. + throughput may be significantly below or above the provisioned throughput throughout the day. - The DynamoDB table IO operations are also not entirely consistent, therefore a provisioned capacity table whose -realized throughput closely meets its provisioned throughput cover a period of one day may not do so the next day. + realized throughput closely meets its provisioned throughput cover a period of one day may not do so the next day. - If the provisioned capacity tables surpass their provisioned throughput, throttling will occur, unless auto-scaling is -implemented for the DynamoDB table. This involves more machinery and is only warranted for DynamoDB tables whose -realized throughput is very close to their provisioned throughput, and which need to be able to handle unpredictable -spikes from time-to-time. This is not cost-effective for a table whose realized throughput does not meet its -provisioned throughput consistently in the first place. + implemented for the DynamoDB table. This involves more machinery and is only warranted for DynamoDB tables whose + realized throughput is very close to their provisioned throughput, and which need to be able to handle unpredictable + spikes from time-to-time. This is not cost-effective for a table whose realized throughput does not meet its + provisioned throughput consistently in the first place. - Due to the reasons described above, it is more cost-effective to [CHANGE ME: PAY_PER_REQUEST OR PROVISIONED] Backups (both integrated and via AWS Backup) are configured for DynamoDB as follows: - Integrated Backup type: in production, the DynamoDB tables have integrated point-in-time-recovery (PITR) backups which -allow for a to-the-second recovery. The retention period is the maximum PITR retention period, which is 35 days. For -non-production environments, PITR is disabled. Integrated on-demand backups are not used, because AWS Backup performs -the same function. Enabling integrated PITR backups alongside AWS Backup allows for the "best of both worlds" for -DynamoDB backups — that is, the ability to restore to the second for the past 35 days, and to have periodic snapshots -for a long period of time. + allow for a to-the-second recovery. The retention period is the maximum PITR retention period, which is 35 days. For + non-production environments, PITR is disabled. Integrated on-demand backups are not used, because AWS Backup performs + the same function. Enabling integrated PITR backups alongside AWS Backup allows for the "best of both worlds" for + DynamoDB backups — that is, the ability to restore to the second for the past 35 days, and to have periodic snapshots + for a long period of time. - AWS Backup is disabled for non-production environments. - AWS Backup frequency: the DynamoDB tables are backed up once a month. For simplicity, this is the first day of the -month. + month. - AWS Backup lifecycle: the DynamoDB tables backups are transitioned to cold storage after some time and are eventually -deleted. The backup is moved to cold storage after [CHANGE ME] days, and is deleted after [CHANGE ME] days. This leaves a short period -in the month to restore the table from warm storage, and exactly 3 months in cold storage to recover the table. + deleted. The backup is moved to cold storage after [CHANGE ME] days, and is deleted after [CHANGE ME] days. This leaves a short period + in the month to restore the table from warm storage, and exactly 3 months in cold storage to recover the table. [TALK ABOUT WHETHER TTL IS GOING TO BE USED] @@ -99,5 +104,3 @@ Create a DynamoDB component and tune it to the outlined requirements. ## References - [https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ReadWriteCapacityMode.html](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ReadWriteCapacityMode.html) - - diff --git a/docs/layers/data/design-decisions/decide-on-elasticache-redis-requirements.mdx b/docs/layers/data/design-decisions/decide-on-elasticache-redis-requirements.mdx index 4fc8a8d4a..741d21aad 100644 --- a/docs/layers/data/design-decisions/decide-on-elasticache-redis-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-elasticache-redis-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on Elasticache Redis Requirements" sidebar_label: "Elasticache Redis Requirements" sidebar_position: 100 refarch_id: REFARCH-365 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -37,6 +40,7 @@ Ideally, share a screenshot of any existing Elasticache redis requirements and w | Number of nodes | | | | Cluster Mode Enabled | | | | AWS Backup requirements | | [https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/backups.html](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/backups.html) | + Additional options - [aws_elasticache_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/reference/elasticache_cluster) @@ -44,6 +48,7 @@ Additional options - [aws_elasticache_replication_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/reference/elasticache_replication_group) ## Consequences + - We’ll provision Elasticache Redis using our `elasticache-redis` component. - Define the catalog entries for the various Redis configurations @@ -51,6 +56,7 @@ Additional options - Enable AWS backups, as necessary ## References + - [https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/WhatIs.html](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/WhatIs.html) - [https://aws.amazon.com/blogs/database/five-workload-characteristics-to-consider-when-right-sizing-amazon-elasticache-redis-clusters/](https://aws.amazon.com/blogs/database/five-workload-characteristics-to-consider-when-right-sizing-amazon-elasticache-redis-clusters/) @@ -58,5 +64,3 @@ Additional options - [https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/nodes-select-size.html](https://docs.aws.amazon.com/AmazonElastiCache/latest/red-ug/nodes-select-size.html) - - - diff --git a/docs/layers/data/design-decisions/decide-on-msk-requirements.mdx b/docs/layers/data/design-decisions/decide-on-msk-requirements.mdx index 25d0c1239..e40802796 100644 --- a/docs/layers/data/design-decisions/decide-on-msk-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-msk-requirements.mdx @@ -3,14 +3,19 @@ title: "Decide on MSK Requirements" sidebar_label: "MSK Requirements" sidebar_position: 100 refarch_id: REFARCH-486 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem + Requirements for MSK clusters deployed to each active compute environment need to be outlined before an MSK component is configured and deployed. ## Context + Amazon MSK clusters are going to be used by applications that use Apache Kafka streams. ## Considered Options @@ -84,5 +89,3 @@ As a best practice, CloudWatch Logs broker logging should be enabled in order to - [https://docs.aws.amazon.com/msk/latest/developerguide/msk-encryption.html](https://docs.aws.amazon.com/msk/latest/developerguide/msk-encryption.html) - [https://docs.aws.amazon.com/msk/latest/developerguide/msk-default-configuration.html](https://docs.aws.amazon.com/msk/latest/developerguide/msk-default-configuration.html) - - diff --git a/docs/layers/data/design-decisions/decide-on-rds-aurora-db-cluster-requirements.mdx b/docs/layers/data/design-decisions/decide-on-rds-aurora-db-cluster-requirements.mdx index 7e04a83af..01c689d37 100644 --- a/docs/layers/data/design-decisions/decide-on-rds-aurora-db-cluster-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-rds-aurora-db-cluster-requirements.mdx @@ -3,11 +3,15 @@ title: "Decide on RDS Aurora DB Cluster Requirements" sidebar_label: "RDS Aurora DB Cluster Requirements" sidebar_position: 100 refarch_id: REFARCH-476 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem + Requirements for Amazon Aurora DB clusters deployed to each active compute environment need to be outlined before an Amazon Aurora component is configured and deployed @@ -15,13 +19,13 @@ Amazon Aurora component is configured and deployed Amazon RDS provides MySQL and PostgreSQL-compatible relational databases that are built for the cloud with greater performance and availability at 1/10th the cost of traditional enterprise databases with the simplicity and cost-effectiveness of open source databases. RDS Aurora features a distributed, fault-tolerant, self-healing storage system that auto-scales up to 128TB per database instance. It delivers high performance and availability with up to 15 low-latency read replicas, point-in-time recovery, continuous backup to Amazon S3, and replication across three Availability Zones. - Amazon Aurora DB clusters (See: [Decide on RDS Technology and Architecture](/layers/data/design-decisions/decide-on-rds-technology-and-architecture)) +Amazon Aurora DB clusters (See: [Decide on RDS Technology and Architecture](/layers/data/design-decisions/decide-on-rds-technology-and-architecture)) ### Known Limitations - [Max of 15 Read Replicas](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/Aurora.Replication.html#:~:text=An%20Aurora%20DB%20cluster%20can%20contain%20up%20to%2015%20Aurora%20Replicas.%20The%20Aurora%20Replicas%20can%20be%20distributed%20across%20the%20Availability%20Zones%20that%20a%20DB%20cluster%20spans%20within%20an%20AWS%20Region.) (we had a customer decline RDS Aurora based on this limitation) -- ~~Point-in-time recovery (PITR) is not yet supported~~ RDS Aurora now supports PITR. [https://aws.amazon.com/blogs/storage/point-in-time-recovery-and-continuous-backup-for-amazon-rds-with-aws-backup/](https://aws.amazon.com/blogs/storage/point-in-time-recovery-and-continuous-backup-for-amazon-rds-with-aws-backup/) +- ~~Point-in-time recovery (PITR) is not yet supported~~ RDS Aurora now supports PITR. [https://aws.amazon.com/blogs/storage/point-in-time-recovery-and-continuous-backup-for-amazon-rds-with-aws-backup/](https://aws.amazon.com/blogs/storage/point-in-time-recovery-and-continuous-backup-for-amazon-rds-with-aws-backup/) - Cannot be launched on public subnets @@ -36,7 +40,7 @@ Create a standardized Aurora DB cluster based on the current use case: RDS aurora replication happens at the filesystem layer versus the conventional database layer. It’s actually a shared filesystem. Hitting the read replicas hard can still impact the masters since they are using the shared filesystem. > Because the cluster volume is shared among all DB instances in your DB cluster, minimal additional work is required to replicate a copy of the data for each Aurora Replica. -[https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/Aurora.Replication.html](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/Aurora.Replication.html) +> [https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/Aurora.Replication.html](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/Aurora.Replication.html) ### RDS Serverless v1 vs v2 @@ -65,18 +69,21 @@ This, in addition to any of the requirements outlined in _v1 Infrastructure Requ #### **Setting**
-
Aurora DB cluster Engine
-
-
Aurora DB cluster Instance Family
-
-
Number of Aurora DB cluster Instances: 1 for all environments except for prod, 3 for prod (or 2 when < 3 AZs are available)
-
-
Regional or Global DB Cluster
-
-
Security-related settings
-
-
Storage Encryption enabled
-
yes
+
Aurora DB cluster Engine
+
+
Aurora DB cluster Instance Family
+
+
+ Number of Aurora DB cluster Instances: 1 for all environments except for + prod, 3 for prod (or 2 when < 3 AZs are available) +
+
+
Regional or Global DB Cluster
+
+
Security-related settings
+
+
Storage Encryption enabled
+
yes
## Other Considerations @@ -94,5 +101,3 @@ Create an Aurora DB Cluster component and tune it to the outlined requirements. - [https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/Aurora.Overview.html](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/Aurora.Overview.html) - [https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-global-database.html](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-global-database.html) - - diff --git a/docs/layers/data/design-decisions/decide-on-rds-technology-and-architecture.mdx b/docs/layers/data/design-decisions/decide-on-rds-technology-and-architecture.mdx index 9a4e3843b..2c3a54607 100644 --- a/docs/layers/data/design-decisions/decide-on-rds-technology-and-architecture.mdx +++ b/docs/layers/data/design-decisions/decide-on-rds-technology-and-architecture.mdx @@ -3,9 +3,12 @@ title: "Decide on RDS Technology and Architecture" sidebar_label: "RDS Technology and Architecture" sidebar_position: 100 refarch_id: REFARCH-211 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -21,7 +24,7 @@ There are several ways in which an RDS Cluster can be deployed. ### **Option 1:** Amazon RDS Instances -Amazon RDS Instances are the original version of RDS and provide simple master-slave replication with multiple read replicas and multi-AZ fail-over capabilities. RDS Instances are best suited for one-off databases (e.g. for microservices or dev environments) where performance is likely not an issue and the ability to do point-in-time restores for a database is required. Point in time recovery allows you to create an additional RDS instance (e.g. it does replace your running instance), based on the data as it existed on your instance at any specific point in time by restoring and replaying the journal to a specific point in time. This feature is not supported yet by RDS Aurora. +Amazon RDS Instances are the original version of RDS and provide simple master-slave replication with multiple read replicas and multi-AZ fail-over capabilities. RDS Instances are best suited for one-off databases (e.g. for microservices or dev environments) where performance is likely not an issue and the ability to do point-in-time restores for a database is required. Point in time recovery allows you to create an additional RDS instance (e.g. it does replace your running instance), based on the data as it existed on your instance at any specific point in time by restoring and replaying the journal to a specific point in time. This feature is not supported yet by RDS Aurora. [https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIT.html](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PIT.html) @@ -60,7 +63,7 @@ For more information, see: [Getting Started with Amazon Aurora Global Databases] ::: > Major version upgrades can contain database changes that are not backward-compatible with previous versions of the database. This functionality can cause your existing applications to stop working correctly. As a result, Amazon Aurora doesn't apply major version upgrades automatically. -[https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/USER_UpgradeDBInstance.PostgreSQL.html#USER_UpgradeDBInstance.PostgreSQL.MajorVersion](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/USER_UpgradeDBInstance.PostgreSQL.html#USER_UpgradeDBInstance.PostgreSQL.MajorVersion)A transit gateway connection will be required between all regions and accounts participating in the Global Database. +> [https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/USER_UpgradeDBInstance.PostgreSQL.html#USER_UpgradeDBInstance.PostgreSQL.MajorVersion](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/USER_UpgradeDBInstance.PostgreSQL.html#USER_UpgradeDBInstance.PostgreSQL.MajorVersion)A transit gateway connection will be required between all regions and accounts participating in the Global Database. [https://cloudposse.atlassian.net/wiki/spaces/REFARCH/pages/1175978124](/layers/data/design-decisions/decide-on-the-backup-aws-region-for-aurora-global-cluster) @@ -109,7 +112,7 @@ Once RDS is deployed, services can either use each database in a shared or dedic - This is the most economical option and achieves greater economies of scale. - The downside is that one cannot automatically restore an individual database, making recoveries from human error -slower and more manual. + slower and more manual. 2. In the dedicated model, one application database is provisioned in each database instance (or cluster): @@ -122,5 +125,3 @@ slower and more manual. ## Consequences A component for an Amazon Aurora RDS cluster will be created and provisioned in each VPC as needed. - - diff --git a/docs/layers/data/design-decisions/decide-on-s3-bucket-requirements.mdx b/docs/layers/data/design-decisions/decide-on-s3-bucket-requirements.mdx index 9fde9e6a9..3f581705f 100644 --- a/docs/layers/data/design-decisions/decide-on-s3-bucket-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-s3-bucket-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on S3 Bucket Requirements" sidebar_label: "S3 Bucket Requirements" sidebar_position: 100 refarch_id: REFARCH-364 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -52,5 +55,3 @@ We’ll use the [Terraform](/resources/legacy/fundamentals/terraform) to generat ## See Also [Decide on Terraform State Backend Architecture](/layers/accounts/design-decisions/decide-on-terraform-state-backend-architecture) - - diff --git a/docs/layers/data/design-decisions/decide-on-sftp-requirements.mdx b/docs/layers/data/design-decisions/decide-on-sftp-requirements.mdx index 3dce179fa..e6a23ddd9 100644 --- a/docs/layers/data/design-decisions/decide-on-sftp-requirements.mdx +++ b/docs/layers/data/design-decisions/decide-on-sftp-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on SFTP Requirements" sidebar_label: "SFTP Requirements" sidebar_position: 100 refarch_id: REFARCH-485 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem **DRAFT** @@ -34,5 +37,3 @@ import KeyPoints from '@site/src/components/KeyPoints'; - [https://aws.amazon.com/blogs/storage/using-okta-as-an-identity-provider-with-aws-transfer-for-sftp/](https://aws.amazon.com/blogs/storage/using-okta-as-an-identity-provider-with-aws-transfer-for-sftp/) - [https://github.com/cloudposse/terraform-aws-transfer-sftp](https://github.com/cloudposse/terraform-aws-transfer-sftp) - - diff --git a/docs/layers/data/design-decisions/decide-on-the-backup-aws-region-for-aurora-global-cluster.mdx b/docs/layers/data/design-decisions/decide-on-the-backup-aws-region-for-aurora-global-cluster.mdx index aede7c847..457893082 100644 --- a/docs/layers/data/design-decisions/decide-on-the-backup-aws-region-for-aurora-global-cluster.mdx +++ b/docs/layers/data/design-decisions/decide-on-the-backup-aws-region-for-aurora-global-cluster.mdx @@ -3,9 +3,12 @@ title: "Decide on the backup AWS region for Aurora Global Cluster" sidebar_label: "Backup AWS region for Aurora Global Cluster" sidebar_position: 100 refarch_id: REFARCH-242 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Considerations @@ -21,10 +24,8 @@ Consequences of deploying RDS Aurora Global Cluster include: - Provisioning additional VPC and Aurora clusters in the backup region - Setting up peering via the transit gateway because write operations go directly to the primary DB instance in the primary AWS Region -::: + ::: ### Related - [Decide on RDS Technology and Architecture](/layers/data/design-decisions/decide-on-rds-technology-and-architecture) - - diff --git a/docs/layers/data/design-decisions/decide-whether-to-use-rds-iam-authentication.mdx b/docs/layers/data/design-decisions/decide-whether-to-use-rds-iam-authentication.mdx index 4c675fd41..9ab662a99 100644 --- a/docs/layers/data/design-decisions/decide-whether-to-use-rds-iam-authentication.mdx +++ b/docs/layers/data/design-decisions/decide-whether-to-use-rds-iam-authentication.mdx @@ -3,9 +3,12 @@ title: "Decide Whether to Use RDS IAM Authentication" sidebar_label: "RDS IAM Authentication" sidebar_position: 100 refarch_id: REFARCH-210 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -16,7 +19,7 @@ Multiple ways exist to authenticate with the database. Static credentials grow s RDS supports IAM authentication, which means IAM credentials are used to obtain short-lived credentials to access the RDS database. Leveraging RDS IAM Authentication in applications requires application changes to leverage the AWS SDK ([Java Example](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/UsingWithRDS.IAMDBAuth.Connecting.Java.html)). :::caution - RDS IAM authentication is not recommended for applications due to a maximum of 200 new connections per second, and therefore only advisable for use with human operators. +RDS IAM authentication is not recommended for applications due to a maximum of 200 new connections per second, and therefore only advisable for use with human operators. ::: For applications, the AWS recommended method is using AWS Secrets Manager (as opposed to RDS IAM Authentication) which also has the built-in capability to rotate credentials. @@ -34,5 +37,3 @@ If we choose to enable RDS IAM Authentication, it’s just a simple feature flag - [https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/UsingWithRDS.IAMDBAuth.Connecting.Java.html](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/UsingWithRDS.IAMDBAuth.Connecting.Java.html) - [Use SSM over ASM for Infrastructure](/resources/adrs/adopted/use-ssm-over-asm-for-infrastructure) - - diff --git a/docs/layers/data/design-decisions/design-decisions.mdx b/docs/layers/data/design-decisions/design-decisions.mdx index 781ca50bb..5279c5313 100644 --- a/docs/layers/data/design-decisions/design-decisions.mdx +++ b/docs/layers/data/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: Design Decisions sidebar_label: Review Design Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; -Review the key design decisions for the data layer, including which services you will rely on on their configurations. + Review the key design decisions for the data layer, including which services + you will rely on on their configurations. - + diff --git a/docs/layers/data/design-decisions/todo-decide-on-rds-aurora-serverless-requirements.mdx b/docs/layers/data/design-decisions/todo-decide-on-rds-aurora-serverless-requirements.mdx index de0bbc37c..b102431a1 100644 --- a/docs/layers/data/design-decisions/todo-decide-on-rds-aurora-serverless-requirements.mdx +++ b/docs/layers/data/design-decisions/todo-decide-on-rds-aurora-serverless-requirements.mdx @@ -3,13 +3,15 @@ title: "(TODO) Decide on RDS Aurora Serverless Requirements" sidebar_label: "(TODO) RDS Aurora Serverless Requirements" sidebar_position: 100 refarch_id: REFARCH-472 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement ## Considered Options ## References - diff --git a/docs/layers/data/design-decisions/todo-decide-on-rds-instance-requirements.mdx b/docs/layers/data/design-decisions/todo-decide-on-rds-instance-requirements.mdx index 5d16a9bd9..41424105f 100644 --- a/docs/layers/data/design-decisions/todo-decide-on-rds-instance-requirements.mdx +++ b/docs/layers/data/design-decisions/todo-decide-on-rds-instance-requirements.mdx @@ -3,9 +3,12 @@ title: "(TODO) Decide on RDS Instance Requirements" sidebar_label: "(TODO) RDS Instance Requirements" sidebar_position: 100 refarch_id: REFARCH-477 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem **DRAFT** @@ -24,5 +27,3 @@ RDS Global Databases are only compatible with RDS Aurora. See [https://cloudposs - [https://docs.aws.amazon.com/whitepapers/latest/cost-optimization-right-sizing/tips-for-right-sizing-your-workloads.html](https://docs.aws.amazon.com/whitepapers/latest/cost-optimization-right-sizing/tips-for-right-sizing-your-workloads.html) - [https://docs.aws.amazon.com/whitepapers/latest/cost-optimization-right-sizing/tips-for-right-sizing-your-workloads.html](https://docs.aws.amazon.com/whitepapers/latest/cost-optimization-right-sizing/tips-for-right-sizing-your-workloads.html) - - diff --git a/docs/layers/ecs/design-decisions/decide-on-ecs-load-balancer-requirements.mdx b/docs/layers/ecs/design-decisions/decide-on-ecs-load-balancer-requirements.mdx index 0062d6f25..1995099df 100644 --- a/docs/layers/ecs/design-decisions/decide-on-ecs-load-balancer-requirements.mdx +++ b/docs/layers/ecs/design-decisions/decide-on-ecs-load-balancer-requirements.mdx @@ -4,9 +4,12 @@ sidebar_label: "Load Balancer" sidebar_position: 1 description: Decide how many ECS load balancers are needed refarch_id: REFARCH-414 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; # Decide on ECS load balancer requirements diff --git a/docs/layers/ecs/design-decisions/decide-on-the-application-service-log-destination-for-ecs.mdx b/docs/layers/ecs/design-decisions/decide-on-the-application-service-log-destination-for-ecs.mdx index dbfaf0c26..0812fcee6 100644 --- a/docs/layers/ecs/design-decisions/decide-on-the-application-service-log-destination-for-ecs.mdx +++ b/docs/layers/ecs/design-decisions/decide-on-the-application-service-log-destination-for-ecs.mdx @@ -4,11 +4,14 @@ sidebar_label: "Service Logs" sidebar_position: 2 description: Decide how to group logs with CloudWatch refarch_id: REFARCH-484 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; ## Context and Problem Statement @@ -40,6 +43,7 @@ Should logs be sent to datadog or log only to cloudwatch logs? - Fargate cpu/mem requirements would go up per task - More expensive due to fargate pricing is by cpu/memory + @@ -75,6 +79,7 @@ Should logs be sent to datadog or log only to cloudwatch logs? - Maintaining upgrades for the lambdas - Monitoring lambdas + @@ -106,5 +111,6 @@ Should logs be sent to datadog or log only to cloudwatch logs? - Monitoring a fluentd cluster - We don’t have prior art for this. + diff --git a/docs/layers/ecs/design-decisions/design-decisions.mdx b/docs/layers/ecs/design-decisions/design-decisions.mdx index 26e116e2a..fe86a1b27 100644 --- a/docs/layers/ecs/design-decisions/design-decisions.mdx +++ b/docs/layers/ecs/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: Review Design Decisions sidebar_label: Review Decisions sidebar_position: 0 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; - Review the key design decisions for ECS. These decisions relate to how you will provision your Elastic Container Service clusters on AWS. + Review the key design decisions for ECS. These decisions relate to how you + will provision your Elastic Container Service clusters on AWS. - + diff --git a/docs/layers/eks/design-decisions/decide-on-default-storage-class.mdx b/docs/layers/eks/design-decisions/decide-on-default-storage-class.mdx index 1ad28651d..907555f17 100644 --- a/docs/layers/eks/design-decisions/decide-on-default-storage-class.mdx +++ b/docs/layers/eks/design-decisions/decide-on-default-storage-class.mdx @@ -2,12 +2,20 @@ title: "Decide on Default Storage Class for EKS Clusters" sidebar_label: "Default Storage Class" description: Determine the default storage class for Kubernetes EKS clusters +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -When provisioning EKS (Kubernetes) clusters, there is no one-size-fits-all recommendation for the default storage class. The right choice depends on your workload’s specific requirements, such as performance, scalability, and cost-efficiency. While only one storage class can be set as the default, storage classes are not mutually exclusive, and the best solution may often involve using a combination of classes to meet various needs. + When provisioning EKS (Kubernetes) clusters, there is no one-size-fits-all + recommendation for the default storage class. The right choice depends on your + workload’s specific requirements, such as performance, scalability, and + cost-efficiency. While only one storage class can be set as the default, + storage classes are not mutually exclusive, and the best solution may often + involve using a combination of classes to meet various needs. A `StorageClass` in Kubernetes defines the type of storage (e.g., EBS, EFS, etc.) and its parameters (e.g., performance, replication) for dynamically provisioning Persistent Volumes. The default `StorageClass` is automatically used when a `PersistentVolumeClaim` (PVC) is created without specifying a specific storage class, but its configuration varies depending on the cluster setup and cloud provider. Storage classes available on AWS differ from other clouds. @@ -17,10 +25,24 @@ A `StorageClass` in Kubernetes defines the type of storage (e.g., EBS, EFS, etc. We need to decide between **Amazon EFS (Elastic File System)** and **Amazon EBS (Elastic Block Store)** as the default storage class for our EKS clusters. -- **Availability Zone Lock-in:** EBS volumes are restricted to a single Availability Zone, which can impact high availability and disaster recovery strategies. This is a key drawback of using EBS. If you need a Pod to recover across multiple AZs, EFS is a more suitable option, though it comes at a higher cost. -- **Performance:** EFS generally offers lower performance when compared to EBS. This can be mitigated by paying for additional bandwidth but has routinely caused outages due to throttling even with low-performance applications. Additionally, poor lock performance makes EFS completely unsuitable for high-performance applications like RDBMS. -- **Cost:** EFS is significantly more expensive than EBS, at least 3x the price per GB and potentially more depending on performance demands, although there may be some savings from not having to reserve size for future growth. -- **Concurrent Access:** EBS volumes can only be attached to one instance at a time within the same Availability Zone, making them unsuitable for scenarios that require concurrent access from multiple instances. In contrast, EFS allows multiple instances or Pods to access the same file system concurrently, which is useful for distributed applications or workloads requiring shared storage across multiple nodes. + - **Availability Zone Lock-in:** EBS volumes are restricted to a single + Availability Zone, which can impact high availability and disaster recovery + strategies. This is a key drawback of using EBS. If you need a Pod to recover + across multiple AZs, EFS is a more suitable option, though it comes at a + higher cost. - **Performance:** EFS generally offers lower performance when + compared to EBS. This can be mitigated by paying for additional bandwidth but + has routinely caused outages due to throttling even with low-performance + applications. Additionally, poor lock performance makes EFS completely + unsuitable for high-performance applications like RDBMS. - **Cost:** EFS is + significantly more expensive than EBS, at least 3x the price per GB and + potentially more depending on performance demands, although there may be some + savings from not having to reserve size for future growth. - **Concurrent + Access:** EBS volumes can only be attached to one instance at a time within + the same Availability Zone, making them unsuitable for scenarios that require + concurrent access from multiple instances. In contrast, EFS allows multiple + instances or Pods to access the same file system concurrently, which is useful + for distributed applications or workloads requiring shared storage across + multiple nodes. ## Amazon EFS @@ -28,12 +50,14 @@ We need to decide between **Amazon EFS (Elastic File System)** and **Amazon EBS **Amazon EFS** provides a scalable, fully managed, elastic file system with NFS compatibility, designed for use with AWS Cloud services and on-premises resources. ### Pros: + - **Unlimited Disk Space:** Automatically scales storage capacity as needed without manual intervention. - **Shared Access:** Allows multiple pods to access the same file system concurrently, facilitating shared storage scenarios. - **Managed Service:** Fully managed by AWS, reducing operational overhead for maintenance and scaling. - **Availability Zone Failover**: For workloads that require failover across multiple Availability Zones, EFS is a more suitable option. It provides multi-AZ durability, ensuring that Pods can recover and access persistent storage seamlessly across different AZs. ### Cons: + - **Lower Performance:** Generally offers lower performance compared to EBS, with throughput as low as 100 MB/s, which may not meet the demands of even modestly demanding applications. - **Higher Cost:** Significantly more expensive than EBS, at least 3x the price per GB and potentially more depending on performance demands, although there may be some savings from not having to reserve size for future growth. - **Higher Latency:** Higher latency compared to EBS, which may impact performance-sensitive applications. @@ -44,13 +68,15 @@ We need to decide between **Amazon EFS (Elastic File System)** and **Amazon EBS **Amazon EBS** provides high-performance block storage volumes for use with Amazon EC2 instances, suitable for a wide range of workloads. ### Pros: + - **Higher Performance:** Offers high IOPS and low latency, making it ideal for performance-critical applications. - **Cost-Effective:** Potentially lower costs for specific storage types and usage scenarios. - **Native EKS Integration:** Well-integrated with Kubernetes through the EBS CSI (Container Storage Interface) driver, facilitating seamless provisioning and management. - **Supports Snapshot and Backup:** Supports snapshotting for data backup, recovery, and cloning. ### Cons: -- **Single-Attach Limitation:** EBS volumes can only be attached to a single node at a time, limiting shared access across multiple Pods or instances. Additional configurations or alternative storage solutions are required for scenarios needing concurrent access. + +- **Single-Attach Limitation:** EBS volumes can only be attached to a single node at a time, limiting shared access across multiple Pods or instances. Additional configurations or alternative storage solutions are required for scenarios needing concurrent access. - **Availability Zones:** EBS volumes are confined to a single Availability Zone, limiting high availability and disaster recovery across zones. This limitation can be mitigated by configuring a `StatefulSet` with replicas spread across multiple AZs. However, for workloads using EBS-backed Persistent Volume Claims (PVCs), failover to a different AZ requires manual intervention, including provisioning a new volume in the target zone, as EBS volumes cannot be moved between zones. - **Non-Elastic Storage:** EBS volumes can be manually resized, but this process is not fully automated in EKS. After resizing an EBS volume, additional manual steps are required to expand the associated Persistent Volume (PV) and Persistent Volume Claim (PVC). This introduces operational complexity, especially for workloads with dynamic storage needs, as EBS lacks automatic scaling like EFS. diff --git a/docs/layers/eks/design-decisions/decide-on-eks-node-pool-architecture.mdx b/docs/layers/eks/design-decisions/decide-on-eks-node-pool-architecture.mdx index d0c8d3efd..ea5115d58 100644 --- a/docs/layers/eks/design-decisions/decide-on-eks-node-pool-architecture.mdx +++ b/docs/layers/eks/design-decisions/decide-on-eks-node-pool-architecture.mdx @@ -3,9 +3,12 @@ title: "Decide on EKS Node Pool Architecture" sidebar_label: "Node Pool Architecture" refarch_id: REFARCH-236 description: Decide on the architecture of the EKS node pools +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; Kubernetes has a concept of Node Pools, which are basically pools of computing resources. Node pools are where the scheduler dispatches workloads based on the taints/tolerations of nodes and pods. diff --git a/docs/layers/eks/design-decisions/decide-on-email-address-for-cert-manager-support-emails.mdx b/docs/layers/eks/design-decisions/decide-on-email-address-for-cert-manager-support-emails.mdx index 4b1ef1c5b..aebb7893d 100644 --- a/docs/layers/eks/design-decisions/decide-on-email-address-for-cert-manager-support-emails.mdx +++ b/docs/layers/eks/design-decisions/decide-on-email-address-for-cert-manager-support-emails.mdx @@ -3,9 +3,12 @@ title: "Decide on email address for cert-manager support emails" sidebar_label: "Email Address for `cert-manager`" refarch_id: REFARCH-202 description: Decide what address to use `cert-manager` support emails +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement diff --git a/docs/layers/eks/design-decisions/decide-on-helm-chart-repository-strategy.mdx b/docs/layers/eks/design-decisions/decide-on-helm-chart-repository-strategy.mdx index dd5cb9382..d590abeab 100644 --- a/docs/layers/eks/design-decisions/decide-on-helm-chart-repository-strategy.mdx +++ b/docs/layers/eks/design-decisions/decide-on-helm-chart-repository-strategy.mdx @@ -3,9 +3,12 @@ title: "Decide on Helm Chart Repository Strategy" sidebar_label: "Helm Charts" refarch_id: REFARCH-207 description: Decide where to host and manage Helm charts +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem diff --git a/docs/layers/eks/design-decisions/decide-on-host-os-flavor-for-eks.mdx b/docs/layers/eks/design-decisions/decide-on-host-os-flavor-for-eks.mdx index 37b0b4e41..88b8bf13b 100644 --- a/docs/layers/eks/design-decisions/decide-on-host-os-flavor-for-eks.mdx +++ b/docs/layers/eks/design-decisions/decide-on-host-os-flavor-for-eks.mdx @@ -3,9 +3,12 @@ title: "Decide on Host OS Flavor for EKS" sidebar_label: "Host OS Flavor for EKS" refarch_id: REFARCH-49 description: Decide on the AMI for EKS cluster nodes +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem diff --git a/docs/layers/eks/design-decisions/decide-on-kubernetes-ingress-controller-s.mdx b/docs/layers/eks/design-decisions/decide-on-kubernetes-ingress-controller-s.mdx index 07629ccb1..ecb11ab79 100644 --- a/docs/layers/eks/design-decisions/decide-on-kubernetes-ingress-controller-s.mdx +++ b/docs/layers/eks/design-decisions/decide-on-kubernetes-ingress-controller-s.mdx @@ -3,9 +3,12 @@ title: "Decide on Kubernetes Ingress Controller(s)" sidebar_label: "Kubernetes Ingress Controller(s)" refarch_id: REFARCH-300 description: Decide which Kubernetes Ingress Controller(s) to use with EKS +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Considerations diff --git a/docs/layers/eks/design-decisions/design-decisions.mdx b/docs/layers/eks/design-decisions/design-decisions.mdx index 19263f31b..7369801f9 100644 --- a/docs/layers/eks/design-decisions/design-decisions.mdx +++ b/docs/layers/eks/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: Review Design Decisions sidebar_label: Review Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; - Review the key design decisions for EKS. These decisions relate to how you will provision your Kubernetes clusters on AWS. + Review the key design decisions for EKS. These decisions relate to how you + will provision your Kubernetes clusters on AWS. - + diff --git a/docs/layers/github-actions/design-decisions/decide-on-self-hosted-runner-architecture.mdx b/docs/layers/github-actions/design-decisions/decide-on-self-hosted-runner-architecture.mdx index 0151b12cb..15caace16 100644 --- a/docs/layers/github-actions/design-decisions/decide-on-self-hosted-runner-architecture.mdx +++ b/docs/layers/github-actions/design-decisions/decide-on-self-hosted-runner-architecture.mdx @@ -2,13 +2,21 @@ title: "Decide on Self-Hosted Runner Architecture" sidebar_label: Runner Architecture description: Decide how to create self-hosted runners +tags: + - design-decision --- import Intro from "@site/src/components/Intro"; -import Note from '@site/src/components/Note'; +import Note from "@site/src/components/Note"; -Decide on how to operate self-hosted runners that are used to run GitHub Actions workflows. These runners can be set up in various ways and allow us to avoid platform fees while running CI jobs in private infrastructure, enabling access to VPC resources. This approach is ideal for private repositories, providing control over instance size, architecture, and control costs by leveraging spot instances. The right choice depends on your platform, whether you’re using predominantly EKS, ECS, or Lambda. + Decide on how to operate self-hosted runners that are used to run GitHub + Actions workflows. These runners can be set up in various ways and allow us to + avoid platform fees while running CI jobs in private infrastructure, enabling + access to VPC resources. This approach is ideal for private repositories, + providing control over instance size, architecture, and control costs by + leveraging spot instances. The right choice depends on your platform, whether + you’re using predominantly EKS, ECS, or Lambda. ## Problem @@ -58,7 +66,14 @@ However, there are some limitations to the official Runner Sets implementation: The runner image used by Runner Sets contains [no more packages than are necessary](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners-with-actions-runner-controller/about-actions-runner-controller#about-the-runner-container-image) to run the runner. This is in contrast to the Summerwind implementation, which contains some commonly needed packages like `build-essential`, `curl`, `wget`, `git`, and `jq`, and the GitHub hosted images which contain a robust set of tools. (This is a limitation of the official Runner Sets implementation, not this component per se.) You will need to install any tools you need in your workflows, either as part of your workflow (recommended), by maintaining a [custom runner image](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners-with-actions-runner-controller/about-actions-runner-controller#creating-your-own-runner-image), or by running such steps in a [separate container](https://docs.github.com/en/actions/using-jobs/running-jobs-in-a-container) that has the tools pre-installed. Many tools have publicly available actions to install them, such as `actions/setup-node` to install NodeJS or `dcarbone/install-jq-action` to install `jq`. You can also install packages using `awalsh128/cache-apt-pkgs-action`, which has the advantage of being able to skip the installation if the package is already installed, so you can more efficiently run the same workflow on GitHub hosted as well as self-hosted runners. - There are (as of this writing) open feature requests to add some commonly needed packages to the official Runner Sets runner image. You can upvote these requests [here](https://github.com/actions/actions-runner-controller/discussions/3168) and [here](https://github.com/orgs/community/discussions/80868) to help get them implemented. + + There are (as of this writing) open feature requests to add some commonly + needed packages to the official Runner Sets runner image. You can upvote + these requests + [here](https://github.com/actions/actions-runner-controller/discussions/3168) + and [here](https://github.com/orgs/community/discussions/80868) to help get + them implemented. + - #### Docker in Docker (dind) mode only diff --git a/docs/layers/github-actions/design-decisions/decide-on-self-hosted-runner-placement.mdx b/docs/layers/github-actions/design-decisions/decide-on-self-hosted-runner-placement.mdx index bec905b65..6e2633249 100644 --- a/docs/layers/github-actions/design-decisions/decide-on-self-hosted-runner-placement.mdx +++ b/docs/layers/github-actions/design-decisions/decide-on-self-hosted-runner-placement.mdx @@ -2,11 +2,17 @@ title: "Decide on Self-Hosted Runner Placement" sidebar_label: Runner Placement description: Decide where to place self-hosted runners in your AWS organization +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; + +import Intro from "@site/src/components/Intro"; -Self-hosted runners are custom runners that we use to run GitHub Actions workflows. We can use these runners to access resources in our private networks and reduce costs by using our own infrastructure. We need to decide where to place these runners in your AWS organization. + Self-hosted runners are custom runners that we use to run GitHub Actions + workflows. We can use these runners to access resources in our private + networks and reduce costs by using our own infrastructure. We need to decide + where to place these runners in your AWS organization. ## Problem diff --git a/docs/layers/github-actions/design-decisions/design-decisions.mdx b/docs/layers/github-actions/design-decisions/design-decisions.mdx index 1acf7f33c..9a1f52922 100644 --- a/docs/layers/github-actions/design-decisions/design-decisions.mdx +++ b/docs/layers/github-actions/design-decisions/design-decisions.mdx @@ -2,12 +2,17 @@ title: Design Decisions sidebar_label: Review Design Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; -Review the key design decisions of the GitHub Action Layer. These decisions relate to how you will manage self-hosted runners for your GitHub Action workflows. + Review the key design decisions of the GitHub Action Layer. These decisions + relate to how you will manage self-hosted runners for your GitHub Action + workflows. - + diff --git a/docs/layers/gitops/design-decisions/design-decisions.mdx b/docs/layers/gitops/design-decisions/design-decisions.mdx index d27767cd9..c4215733e 100644 --- a/docs/layers/gitops/design-decisions/design-decisions.mdx +++ b/docs/layers/gitops/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: Design Decisions sidebar_label: Review Design Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; -Review the key design decisions for GitOps with Terraform. These decisions relate to how you will implement GitHub Actions for Terraform with Atmos. + Review the key design decisions for GitOps with Terraform. These decisions + relate to how you will implement GitHub Actions for Terraform with Atmos. - + diff --git a/docs/layers/identity/design-decisions/decide-on-aws-cli-login.mdx b/docs/layers/identity/design-decisions/decide-on-aws-cli-login.mdx index a16607874..80b294a28 100644 --- a/docs/layers/identity/design-decisions/decide-on-aws-cli-login.mdx +++ b/docs/layers/identity/design-decisions/decide-on-aws-cli-login.mdx @@ -2,14 +2,18 @@ title: "Decide on AWS CLI Login" sidebar_label: "AWS CLI Login" description: Decide on a CLI tool that enables AWS login and credentials via SAML IDP for CLI and web console access. +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; -import Note from '@site/src/components/Note'; -import TaskList from '@site/src/components/TaskList'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; +import Note from "@site/src/components/Note"; +import TaskList from "@site/src/components/TaskList"; -Decide on a CLI tool that enables AWS login and credentials via SAML IDP for CLI and web console access. + Decide on a CLI tool that enables AWS login and credentials via SAML IDP for + CLI and web console access. ## Problem diff --git a/docs/layers/identity/design-decisions/decide-on-idp-integration.mdx b/docs/layers/identity/design-decisions/decide-on-idp-integration.mdx index 4ebe2ba86..bc83001d1 100644 --- a/docs/layers/identity/design-decisions/decide-on-idp-integration.mdx +++ b/docs/layers/identity/design-decisions/decide-on-idp-integration.mdx @@ -2,12 +2,19 @@ title: Decide on Identity Provider (IdP) Integration Method sidebar_label: IdP Integration Method description: Decide how to use Identity Provider (IdP) with AWS +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -Ensure your organization can efficiently and securely manage access to AWS resources. By choosing the appropriate IdP integration method, either AWS Identity Center (SSO) or AWS SAML, you can align your authentication processes with their operational structures, avoiding potential overlaps and inefficiencies. + Ensure your organization can efficiently and securely manage access to AWS + resources. By choosing the appropriate IdP integration method, either AWS + Identity Center (SSO) or AWS SAML, you can align your authentication processes + with their operational structures, avoiding potential overlaps and + inefficiencies. ## Problem @@ -30,20 +37,20 @@ Cloud Posse supports both AWS SAML and AWS Identity Center (AWS SSO) for authent options. - **[AWS SAML 2.0 based federation](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_saml.html)** is -provisioned in the centralized identity account and then permits roles to assume access roles in other accounts across -the organization. It works well with multiple IdPs, enabling roles to be programmatically associated with specific -providers. Aside from the obvious benefit of using AWS SAML to provide a single authentication page for users, is that -the benefit that the AWS SAML approach enables granular control over all the mechanics, giving administrators ultimate -control over how it works. Internally, Cloud Posse uses AWS SAML to authenticate with all customers to access many AWS -Organizations easily. + provisioned in the centralized identity account and then permits roles to assume access roles in other accounts across + the organization. It works well with multiple IdPs, enabling roles to be programmatically associated with specific + providers. Aside from the obvious benefit of using AWS SAML to provide a single authentication page for users, is that + the benefit that the AWS SAML approach enables granular control over all the mechanics, giving administrators ultimate + control over how it works. Internally, Cloud Posse uses AWS SAML to authenticate with all customers to access many AWS + Organizations easily. - **[AWS Identity Center (AWS SSO)](https://aws.amazon.com/iam/identity-center/)**, alternatively, is deployed for a -single AWS Organization. With AWS Identity Center, we have a single access page for all accounts in the Organization and -can connect directly to a given account. **AWS Identity Center is the recommended choice for customers**, given that -most customers manage a single AWS Organization, and the single login page is the most user-friendly option. It's also -ideal for business users, requiring no additional software configuration like Leapp to access resources through the AWS -web console. However, it is limited to a single IdP, so companies that depend on multiple IdP's should consider other -options. + single AWS Organization. With AWS Identity Center, we have a single access page for all accounts in the Organization and + can connect directly to a given account. **AWS Identity Center is the recommended choice for customers**, given that + most customers manage a single AWS Organization, and the single login page is the most user-friendly option. It's also + ideal for business users, requiring no additional software configuration like Leapp to access resources through the AWS + web console. However, it is limited to a single IdP, so companies that depend on multiple IdP's should consider other + options. Both options can be deployed simultaneously. You can choose to have both or either option deployed. diff --git a/docs/layers/identity/design-decisions/decide-on-idp.mdx b/docs/layers/identity/design-decisions/decide-on-idp.mdx index d605522c6..ff8d88219 100644 --- a/docs/layers/identity/design-decisions/decide-on-idp.mdx +++ b/docs/layers/identity/design-decisions/decide-on-idp.mdx @@ -2,14 +2,19 @@ title: "Decide on Identity Provider (IdP)" sidebar_label: Identity Provider (IdP) description: Decide which Identity Provider (IdP) to use with AWS +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; -import Note from '@site/src/components/Note'; -import TaskList from '@site/src/components/TaskList'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; +import Note from "@site/src/components/Note"; +import TaskList from "@site/src/components/TaskList"; -Simplify AWS authentication by leveraging existing email providers or Identity Providers (IdPs), ensuring streamlined access management and ease of use for your team. + Simplify AWS authentication by leveraging existing email providers or Identity + Providers (IdPs), ensuring streamlined access management and ease of use for + your team. ## Problem @@ -34,9 +39,11 @@ already have a specialized one, such as Okta, Auth0, or JumpCloud. Follow the steps below to integrate your IdP of choice with AWS. -Cloud Posse requires this information for your team to sign in to the new AWS Accounts. - -- [ ] Please create a temporary User in your IdP for the Cloud Posse Team. The Cloud Posse Team will use this account to verify - access to several resources. For example `cloudposse@acme.com`. - + Cloud Posse requires this information for your team to sign in to the new AWS + Accounts. + + - [ ] Please create a temporary User in your IdP for the Cloud Posse Team. + The Cloud Posse Team will use this account to verify access to several + resources. For example `cloudposse@acme.com`. + diff --git a/docs/layers/identity/design-decisions/design-decisions.mdx b/docs/layers/identity/design-decisions/design-decisions.mdx index c369aa070..4c7245a2c 100644 --- a/docs/layers/identity/design-decisions/design-decisions.mdx +++ b/docs/layers/identity/design-decisions/design-decisions.mdx @@ -2,12 +2,17 @@ title: Design Decisions sidebar_label: Review Design Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; -Review the key design decisions of the Identity Layer. These decisions relate to how you will manage identity and access management (IAM) in your AWS accounts together with your Identity Provider (IdP). + Review the key design decisions of the Identity Layer. These decisions relate + to how you will manage identity and access management (IAM) in your AWS + accounts together with your Identity Provider (IdP). - + diff --git a/docs/layers/monitoring/design-decisions/design-decisions.mdx b/docs/layers/monitoring/design-decisions/design-decisions.mdx index 788e36289..7c8d17ef7 100644 --- a/docs/layers/monitoring/design-decisions/design-decisions.mdx +++ b/docs/layers/monitoring/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: Design Decisions sidebar_label: Review Design Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; -Review the key design decisions for how you'll gather telemetry and logs for your applications. + Review the key design decisions for how you'll gather telemetry and logs for + your applications. - + diff --git a/docs/layers/network/design-decisions/decide-on-aws-account-vpc-subnet-cidr-strategy.mdx b/docs/layers/network/design-decisions/decide-on-aws-account-vpc-subnet-cidr-strategy.mdx index 411497807..9da753d68 100644 --- a/docs/layers/network/design-decisions/decide-on-aws-account-vpc-subnet-cidr-strategy.mdx +++ b/docs/layers/network/design-decisions/decide-on-aws-account-vpc-subnet-cidr-strategy.mdx @@ -3,9 +3,12 @@ title: "Decide on AWS Account VPC Subnet CIDR Strategy" sidebar_label: "VPC Subnet CIDR Strategy" refarch_id: REFARCH-217 description: "Decide the VPC CIDR ranges for AWS accounts" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem diff --git a/docs/layers/network/design-decisions/decide-on-cidr-allocation.mdx b/docs/layers/network/design-decisions/decide-on-cidr-allocation.mdx index 72723a0da..1b8168de6 100644 --- a/docs/layers/network/design-decisions/decide-on-cidr-allocation.mdx +++ b/docs/layers/network/design-decisions/decide-on-cidr-allocation.mdx @@ -2,11 +2,22 @@ title: Decide on CIDR Allocations sidebar_label: CIDR Allocations description: Decide on CIDR blocks for VPCs and Subnets +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; Please also read the [design decision](/layers/network/design-decisions/decide-on-aws-account-vpc-subnet-cidr-strategy) for more information. - + diff --git a/docs/layers/network/design-decisions/decide-on-client-vpn-options.mdx b/docs/layers/network/design-decisions/decide-on-client-vpn-options.mdx index 06d533b01..bdcb33c98 100644 --- a/docs/layers/network/design-decisions/decide-on-client-vpn-options.mdx +++ b/docs/layers/network/design-decisions/decide-on-client-vpn-options.mdx @@ -3,9 +3,12 @@ title: "Decide on Client VPN Options" sidebar_label: "Client VPN Options" refarch_id: REFARCH-517 description: Decide how to use AWS Client VPNs +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement diff --git a/docs/layers/network/design-decisions/decide-on-hostname-scheme-for-service-discovery.mdx b/docs/layers/network/design-decisions/decide-on-hostname-scheme-for-service-discovery.mdx index 4bbc47d9b..4e1961e1a 100644 --- a/docs/layers/network/design-decisions/decide-on-hostname-scheme-for-service-discovery.mdx +++ b/docs/layers/network/design-decisions/decide-on-hostname-scheme-for-service-discovery.mdx @@ -3,9 +3,12 @@ title: "Decide on Hostname Scheme for Service Discovery" sidebar_label: "Hostname Format" refarch_id: REFARCH-208 description: "Decide the hostname format for service discovery" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ### Context and Problem Statement diff --git a/docs/layers/network/design-decisions/decide-on-how-to-support-tls.mdx b/docs/layers/network/design-decisions/decide-on-how-to-support-tls.mdx index 201275cad..2e00d16be 100644 --- a/docs/layers/network/design-decisions/decide-on-how-to-support-tls.mdx +++ b/docs/layers/network/design-decisions/decide-on-how-to-support-tls.mdx @@ -3,9 +3,12 @@ title: "Decide on How to Support TLS" sidebar_label: TLS Implementation refarch_id: REFARCH-494 description: Decide on how to support TLS in your environment +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement diff --git a/docs/layers/network/design-decisions/decide-on-ipv4-and-ipv6-support.mdx b/docs/layers/network/design-decisions/decide-on-ipv4-and-ipv6-support.mdx index 5a01b2b2f..7bf1b8fdd 100644 --- a/docs/layers/network/design-decisions/decide-on-ipv4-and-ipv6-support.mdx +++ b/docs/layers/network/design-decisions/decide-on-ipv4-and-ipv6-support.mdx @@ -3,9 +3,12 @@ title: "Decide on IPv4 and IPv6 support" sidebar_label: "IPv4 and IPv6" refarch_id: REFARCH-541 description: Decide whether to support both IPv4 and IPv6 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement diff --git a/docs/layers/network/design-decisions/decide-on-opting-into-non-default-regions.mdx b/docs/layers/network/design-decisions/decide-on-opting-into-non-default-regions.mdx index b1de32ca1..27fdedeb2 100644 --- a/docs/layers/network/design-decisions/decide-on-opting-into-non-default-regions.mdx +++ b/docs/layers/network/design-decisions/decide-on-opting-into-non-default-regions.mdx @@ -3,9 +3,12 @@ title: "Decide on Opting Into Non-default Regions" sidebar_label: "Non-default Regions" refarch_id: REFARCH-392 description: Decide which non-default AWS regions to enable +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; If a Region is disabled by default, you must enable it before you can create and manage resources. It would be a pre-requisite to deploying anything in the region. diff --git a/docs/layers/network/design-decisions/decide-on-organization-supernet-cidr-ranges.mdx b/docs/layers/network/design-decisions/decide-on-organization-supernet-cidr-ranges.mdx index 8489734ba..3f53914cd 100644 --- a/docs/layers/network/design-decisions/decide-on-organization-supernet-cidr-ranges.mdx +++ b/docs/layers/network/design-decisions/decide-on-organization-supernet-cidr-ranges.mdx @@ -3,9 +3,12 @@ title: "Decide on Organization Supernet CIDR Ranges" sidebar_label: "Org Supernet CIDRs" refarch_id: REFARCH-240 description: "Choose the all-encompassing CIDR for the AWS organization" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem diff --git a/docs/layers/network/design-decisions/decide-on-primary-aws-region.mdx b/docs/layers/network/design-decisions/decide-on-primary-aws-region.mdx index 6ad1368de..fe1e18af0 100644 --- a/docs/layers/network/design-decisions/decide-on-primary-aws-region.mdx +++ b/docs/layers/network/design-decisions/decide-on-primary-aws-region.mdx @@ -3,9 +3,12 @@ title: "Decide on Primary AWS Region" sidebar_label: "Primary AWS Region" refarch_id: REFARCH-56 description: Pick the primary AWS region to use for the company +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; While the company might operate in multiple regions, one region should be selected as the primary region. There are certain resources that will not be geographically distributed and these should be provisioned in this default region. diff --git a/docs/layers/network/design-decisions/decide-on-service-discovery-domain.mdx b/docs/layers/network/design-decisions/decide-on-service-discovery-domain.mdx index a6a819b79..291eba9e6 100644 --- a/docs/layers/network/design-decisions/decide-on-service-discovery-domain.mdx +++ b/docs/layers/network/design-decisions/decide-on-service-discovery-domain.mdx @@ -3,10 +3,13 @@ title: "Decide on Service Discovery Domain" sidebar_label: "Service Discovery Domain" refarch_id: REFARCH-46 description: Decide the TLD to use for service discovery +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; -import ReactPlayer from 'react-player' + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; +import ReactPlayer from "react-player"; It's important to distinguish between branded/vanity domains (e.g. `cloudposse.com`, `slack.cloudposse.com`) used by customers and your infrastructure service discovery domains (e.g. `cloudposse.net`) used by services or internal consumers. For example, a product might have dozens of branded domains for SEO and marketing purposes, but you'll only have one infrastructure powering it. The service discovery domain is only for internal consumption. We get to define the conventions for this, not marketing. 😉 The service discovery domain will always be hosted on Route53, while the vanity domain can be hosted anywhere. @@ -145,7 +148,12 @@ We think this is overkill and instead, recommend the dedicated TLD per AWS Organ [Decide on Hostname Scheme for Service Discovery](/layers/network/design-decisions/decide-on-hostname-scheme-for-service-discovery) leveraging delegated zones by account. - + ## Related @@ -154,4 +162,3 @@ leveraging delegated zones by account. - [Decide on Vanity (Branded) Domain](/layers/network/design-decisions/decide-on-vanity-branded-domain) - [https://youtu.be/ao-2mfA5OTE](https://youtu.be/ao-2mfA5OTE) - diff --git a/docs/layers/network/design-decisions/decide-on-transit-gateway-requirements.mdx b/docs/layers/network/design-decisions/decide-on-transit-gateway-requirements.mdx index 30b66b206..d5e75db23 100644 --- a/docs/layers/network/design-decisions/decide-on-transit-gateway-requirements.mdx +++ b/docs/layers/network/design-decisions/decide-on-transit-gateway-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on Transit Gateway Requirements" sidebar_label: "Transit Gateways" refarch_id: REFARCH-487 description: Decide how AWS Transit Gateway will be used +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement diff --git a/docs/layers/network/design-decisions/decide-on-vanity-branded-domain.mdx b/docs/layers/network/design-decisions/decide-on-vanity-branded-domain.mdx index 2eff5d8ac..a2cb2ff6b 100644 --- a/docs/layers/network/design-decisions/decide-on-vanity-branded-domain.mdx +++ b/docs/layers/network/design-decisions/decide-on-vanity-branded-domain.mdx @@ -3,10 +3,13 @@ title: "Decide on Vanity (Branded) Domains" sidebar_label: "Vanity (Branded) Domains" refarch_id: REFARCH-54 description: Decide the vanity domains for your environments +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; -import ReactPlayer from 'react-player' + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; +import ReactPlayer from "react-player"; ## Problem @@ -60,7 +63,12 @@ will be using. ::: - + ## FAQ diff --git a/docs/layers/network/design-decisions/decide-on-vpc-nat-strategy.mdx b/docs/layers/network/design-decisions/decide-on-vpc-nat-strategy.mdx index 5f567581a..051405eb5 100644 --- a/docs/layers/network/design-decisions/decide-on-vpc-nat-strategy.mdx +++ b/docs/layers/network/design-decisions/decide-on-vpc-nat-strategy.mdx @@ -3,9 +3,12 @@ title: "Decide on VPC NAT Strategy" sidebar_label: "VPC NAT Strategy" refarch_id: REFARCH-523 description: Decide how to NAT traffic in your VPCs +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; **DRAFT** @@ -20,7 +23,8 @@ originates from a specific set of IPs ::: -
+ +
#### Pros @@ -37,12 +41,14 @@ originates from a specific set of IPs ### Option 2 - One VPC per Region, Per Platform Account with Centralized NAT Gateways per AZ in Network Account -
+ +
The Compliant Framework for Federal and DoD Workloads in AWS GovCloud (US) advocates for a strategy like this, whereby in the Network (transit) account, there will be a DMZ with a Firewall. -
+ +
#### Pros diff --git a/docs/layers/network/design-decisions/decide-on-vpc-network-traffic-isolation-policy.mdx b/docs/layers/network/design-decisions/decide-on-vpc-network-traffic-isolation-policy.mdx index 8bb47ff6e..553013166 100644 --- a/docs/layers/network/design-decisions/decide-on-vpc-network-traffic-isolation-policy.mdx +++ b/docs/layers/network/design-decisions/decide-on-vpc-network-traffic-isolation-policy.mdx @@ -3,9 +3,12 @@ title: "Decide on VPC Network Traffic Isolation Policy" sidebar_label: "VPC Traffic Isolation" refarch_id: REFARCH-524 description: Decide how network traffic is isolated with VPCs +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement diff --git a/docs/layers/network/design-decisions/decide-vpc-peering-requirements-e-g-to-legacy-env.mdx b/docs/layers/network/design-decisions/decide-vpc-peering-requirements-e-g-to-legacy-env.mdx index 78badd5ca..2a5f61b0e 100644 --- a/docs/layers/network/design-decisions/decide-vpc-peering-requirements-e-g-to-legacy-env.mdx +++ b/docs/layers/network/design-decisions/decide-vpc-peering-requirements-e-g-to-legacy-env.mdx @@ -3,9 +3,12 @@ title: "Decide on VPC Peering Requirements (e.g. to Legacy Env)" sidebar_label: "VPC Peering Requirements" refarch_id: REFARCH-80 description: Decide how to connect VPCs in different accounts +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement diff --git a/docs/layers/network/design-decisions/design-decisions.mdx b/docs/layers/network/design-decisions/design-decisions.mdx index f68e99562..ef22d6db8 100644 --- a/docs/layers/network/design-decisions/design-decisions.mdx +++ b/docs/layers/network/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: Review Design Decisions sidebar_label: Review Design Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; - Review the key design decisions for how you'll implement the network and DNS layer of your infrastructure. + Review the key design decisions for how you'll implement the network and DNS + layer of your infrastructure. - + diff --git a/docs/layers/project/design-decisions/decide-on-1password-strategy.mdx b/docs/layers/project/design-decisions/decide-on-1password-strategy.mdx index c3e201d43..7f57ef884 100644 --- a/docs/layers/project/design-decisions/decide-on-1password-strategy.mdx +++ b/docs/layers/project/design-decisions/decide-on-1password-strategy.mdx @@ -4,12 +4,19 @@ sidebar_label: "1Password Strategy" sidebar_position: 4 refarch_id: REFARCH-34 description: "Review password strategy for engagements with Cloud Posse" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -We need to determine the best strategy for using 1Password to securely share sensitive information, such as passwords and integration keys, with individuals and teams during engagements with Cloud Posse. This decision aims to ensure a secure and efficient method for exchanging secrets while considering compatibility with AWS root account credentials. + We need to determine the best strategy for using 1Password to securely share + sensitive information, such as passwords and integration keys, with + individuals and teams during engagements with Cloud Posse. This decision aims + to ensure a secure and efficient method for exchanging secrets while + considering compatibility with AWS root account credentials. ## Problem diff --git a/docs/layers/project/design-decisions/decide-on-ecr-strategy.mdx b/docs/layers/project/design-decisions/decide-on-ecr-strategy.mdx index 7de436e2e..8d8c51b55 100644 --- a/docs/layers/project/design-decisions/decide-on-ecr-strategy.mdx +++ b/docs/layers/project/design-decisions/decide-on-ecr-strategy.mdx @@ -4,9 +4,12 @@ sidebar_label: "ECR Strategy" sidebar_position: 6 refarch_id: REFARCH-254 description: "Decide how you'll use ECR for storing docker images" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; This decision assumes that per the previous design decision, we’ll be using ECR to store docker images. diff --git a/docs/layers/project/design-decisions/decide-on-infrastructure-repository-name.mdx b/docs/layers/project/design-decisions/decide-on-infrastructure-repository-name.mdx index 9cf78f5b1..0938de23b 100644 --- a/docs/layers/project/design-decisions/decide-on-infrastructure-repository-name.mdx +++ b/docs/layers/project/design-decisions/decide-on-infrastructure-repository-name.mdx @@ -4,9 +4,12 @@ sidebar_label: "Repository Name" sidebar_position: 3 refarch_id: REFARCH-52 description: "Decide on where to keep your infrastructure code" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; We highly recommend using a mono-repo for your foundational infrastructure. This doesn’t preclude introducing other infrastructure repositories in the future. diff --git a/docs/layers/project/design-decisions/decide-on-namespace-abbreviation.mdx b/docs/layers/project/design-decisions/decide-on-namespace-abbreviation.mdx index 7e8310e39..fb01faf7d 100644 --- a/docs/layers/project/design-decisions/decide-on-namespace-abbreviation.mdx +++ b/docs/layers/project/design-decisions/decide-on-namespace-abbreviation.mdx @@ -4,9 +4,12 @@ sidebar_label: "Namespace Abbreviation" sidebar_position: 2 refarch_id: REFARCH-53 description: "Pick a concise prefix for all cloud resource names" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; Using a common prefix for all resource names will help establish a consistent naming convention. Certain resources in AWS are globally unique (e.g. for all customers). In order to maintain an (optimistically) unique naming convention, @@ -26,10 +29,8 @@ names.
Intel
`intl`
-
Google
`ggl`
-
Cloud Posse
`cpco`
diff --git a/docs/layers/project/design-decisions/decide-on-regional-naming-scheme.mdx b/docs/layers/project/design-decisions/decide-on-regional-naming-scheme.mdx index b3fd152c1..c7029790a 100644 --- a/docs/layers/project/design-decisions/decide-on-regional-naming-scheme.mdx +++ b/docs/layers/project/design-decisions/decide-on-regional-naming-scheme.mdx @@ -4,23 +4,27 @@ sidebar_label: "Regional Naming Scheme" sidebar_position: 7 refarch_id: REFARCH-209 description: "Decide on a regional naming scheme for resources" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -We need to decide how we’ll handle DR if that’s a requirement. It has far-reaching implications on naming conventions -and is not an easily reversible decision. + We need to decide how we’ll handle DR if that’s a requirement. It has + far-reaching implications on naming conventions and is not an easily + reversible decision. Our current best practice is to use the following convention: -| **Field** | **Description** | **Example** | -| ------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `namespace` | Something short that uniquely identifies your organization. This relates to [Decide on Namespace Abbreviation](/layers/project/design-decisions/decide-on-namespace-abbreviation) | `cpco`

`eg` | -| `tenant` | | | +| **Field** | **Description** | **Example** | +| ------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `namespace` | Something short that uniquely identifies your organization. This relates to [Decide on Namespace Abbreviation](/layers/project/design-decisions/decide-on-namespace-abbreviation) | `cpco`

`eg` | +| `tenant` | | | | `environment`
(aka abbreviated region) | Environment indicates which AWS region the resource is in, using one of 2 sets of abbreviations. We use `gbl` for resources that are not specific to any region, such as IAM Roles.

You have a choice of 2 sets of abbreviations: `fixed` or `short`.

The `fixed` abbreviations are

- exactly 3 letters


- short and consistent so lists stay aligned on semantic boundaries


- The drawback is that AWS regions, have collisions when algorithmically reduced to 3 letters, so some regions (particularly in Asia) have non-obvious abbreviations


The `short` abbreviations are

- 4 or more letters


- easier to understand


- usually identical to the prefix AWS uses for Availability Zone IDs in the region


- The drawback is that there is 1 or more additional characters which can lead closer to max character restraints (e.g. target groups have a max of 32 characters)


We recommend using the `short` abbreviations, which more closely canonical zone ids by AWS.

See [AWS Region Codes](/resources/adrs/adopted/use-aws-region-codes/#region-codes) for the full breakdown. | AWS region code → fixed abbreviation (3 letter) → short abbreviation (4 letter+)

`us-east-1` → `ue1` → `use1`

`us-west-2` → `uw2` → `usw1`

`eu-west-3` → `ew3` → `euw3`

`ap-south-1` → `as0` → `aps1`

`af-south-1` → `fs1` → `afs1`

`cn-north-1` → `nn0` → `cnn1`

`us-gov-west-1` → `gw1` → `usgw1` | -| `stage`
(aka account) | The stage is where the resources operate. Our convention is to isolate every stage in a dedicated AWS member account (aka flavor), which is why we frequently call accounts stages. | `prod`, `at`, `network` | +| `stage`
(aka account) | The stage is where the resources operate. Our convention is to isolate every stage in a dedicated AWS member account (aka flavor), which is why we frequently call accounts stages. | `prod`, `at`, `network` | These field names correspond to the variable inputs of the `terraform-null-label` ([https://github.com/cloudposse/terraform-null-label](https://github.com/cloudposse/terraform-null-label)) used diff --git a/docs/layers/project/design-decisions/decide-on-secrets-management-placement.mdx b/docs/layers/project/design-decisions/decide-on-secrets-management-placement.mdx index 454e69f70..007674bf1 100644 --- a/docs/layers/project/design-decisions/decide-on-secrets-management-placement.mdx +++ b/docs/layers/project/design-decisions/decide-on-secrets-management-placement.mdx @@ -4,11 +4,15 @@ sidebar_label: "Secrets Placement" sidebar_position: 5 refarch_id: REFARCH-81 description: "Decide where to store secrets used by Terraform" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; + +import Intro from "@site/src/components/Intro"; -We need to decide where to store secrets used by Terraform. We have two options: store secrets in each account or store them in a centralized account. + We need to decide where to store secrets used by Terraform. We have two + options: store secrets in each account or store them in a centralized account. ## Context @@ -52,4 +56,3 @@ We will use AWS SSM Parameter Store for all platform-level secrets used by `infr ## Related - [Decide on Secrets Strategy for Terraform](/layers/project/design-decisions/decide-on-secrets-management-strategy-for-terraform/) - diff --git a/docs/layers/project/design-decisions/decide-on-secrets-management-strategy-for-terraform.mdx b/docs/layers/project/design-decisions/decide-on-secrets-management-strategy-for-terraform.mdx index 1d5894ca1..5e73e324f 100644 --- a/docs/layers/project/design-decisions/decide-on-secrets-management-strategy-for-terraform.mdx +++ b/docs/layers/project/design-decisions/decide-on-secrets-management-strategy-for-terraform.mdx @@ -4,12 +4,18 @@ sidebar_label: "Secrets Management" sidebar_position: 5 refarch_id: REFARCH-81 description: "Decide how to manage secrets used by Terraform" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -Deciding how to store secrets is crucial for securing both platform integration and application data when using Terraform. The appropriate secret store depends on the stack layer and must account for situations where other infrastructure might not yet be in place (e.g. Vault, Kubernetes, etc). + Deciding how to store secrets is crucial for securing both platform + integration and application data when using Terraform. The appropriate secret + store depends on the stack layer and must account for situations where other + infrastructure might not yet be in place (e.g. Vault, Kubernetes, etc). We need to decide where secrets will be kept. We’ll need to be able to securely store platform integration secrets (e.g. master keys for RDS, HashiCorp Vault unseal keys, etc) as well as application secrets (any secure customer data). diff --git a/docs/layers/project/design-decisions/decide-on-terraform-version.mdx b/docs/layers/project/design-decisions/decide-on-terraform-version.mdx index 154c25ae8..4bc48a71e 100644 --- a/docs/layers/project/design-decisions/decide-on-terraform-version.mdx +++ b/docs/layers/project/design-decisions/decide-on-terraform-version.mdx @@ -4,31 +4,111 @@ sidebar_label: "Terraform Version" sidebar_position: 1 refarch_id: REFARCH-280 description: "Choose Terraform version for compatibility and consistency" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -Different versions of Terraform have different features and compatibility, but all 1.x versions are backward compatible. We need to decide on the best Terraform version to use to avoid issues and maintain consistency across modules and components. + Different versions of Terraform and OpenTofu offer varying features and + compatibility. Terraform 1.x versions maintain backward compatibility within + the series, providing stability for existing workflows. However, OpenTofu + offers a fully open-source alternative that aligns with Cloud Posse's values + and avoids potential legal risks introduced by Terraform's licensing changes. + To ensure consistency and compatibility across modules and components, Cloud + Posse recommends OpenTofu as the preferred choice for new projects and + workflows. +:::warning Disclaimer +The content of this document is provided for informational purposes only and should not be construed as legal advice. Cloud Posse is not qualified to provide legal counsel, and any decisions related to the use of Terraform under the Business Source License (BSL) should be reviewed by professional legal advisors. OpenTofu is recommended based on technical and operational considerations, not legal advice. +::: + +## Context + +Terraform is a popular infrastructure-as-code tool that allows you to define, provision, and manage cloud resources. Terraform is developed by HashiCorp. From inception to 1.5.7, all versions were permissively licensed under the OSI-approved MPL software license. All newer releases are available under the Business Source License (BSL). The BSL license imposes restrictions on the use of Terraform in certain scenarios, which may impact long-term use and compatibility with third-party tools and integrations. + +Subsequently, every major open-source OS distribution (e.g. [Debian](https://wiki.debian.org/DFSGLicenses#DFSG-compatible_Licenses), [Alpine](https://wiki.alpinelinux.org/wiki/Release_Notes_for_Alpine_3.19.0#HashiCorp_packages), [Homebrew](https://formulae.brew.sh/formula/terraform)) has removed Terraform from their registries due to the BSL license. [GitLab has also removed Terraform](https://docs.gitlab.com/ee/update/deprecations.html#deprecate-terraform-cicd-templates) from their CI/CD pipelines due to the BSL license. This has created a significant challenge for organizations that rely on Terraform for infrastructure automation. + +OpenTofu (previously named OpenTF) is a fork of Terraform 1.5.7 that was [accepted by the CNCF](https://www.linuxfoundation.org/press/announcing-opentofu) and is fully open-source under the MPL license. OpenTofu is designed to maintain compatibility with Terraform 1.x modules and components while providing a stable and open-source alternative to the BSL-licensed Terraform versions. + +:::important +Terraform providers are not affected by this change. They are independently licensed and can be used with any version of Terraform and OpenTofu. While HashiCorp maintains some providers, the vast majority are not maintained by HashiCorp. Most importantly, the [`terraform-provider-aws`](https://github.com/hashicorp/terraform-provider-aws/blob/main/LICENSE) remains under the MPL license. +::: + +### OpenTofu Supporters + +[![CNCF Landscape](https://img.shields.io/badge/CNCF%20Landscape-5699C6)](https://landscape.cncf.io/?item=provisioning--automation-configuration--opentofu) + +The project is backed by many organizations, including: + +- [CNCF](https://github.com/cncf/sandbox/issues/81) +- [CloudFlare](https://blog.cloudflare.com/expanding-our-support-for-oss-projects-with-project-alexandria/) +- [OpenStreet Maps](https://twitter.com/OSM_Tech/status/1745147427324133501) +- [JetBrains](https://blog.jetbrains.com/idea/2024/11/intellij-idea-2024-3/) +- [Cisco](https://blogs.cisco.com/developer/open-tofu-providers) +- [Microsoft Azure](https://github.com/Azure/Azure-Verified-Modules/discussions/1512), [`microsoft/fabric`](https://github.com/opentofu/registry/issues/1004), [`terraform-provider-azapi`](https://github.com/opentofu/registry/issues/920) +- [VMWare Tanzu](https://docs.vmware.com/en/Tanzu-Cloud-Service-Broker-for-AWS/1.10/csb-aws/GUID-index.html) +- Cloud Posse +- Mixpanel +- Buildkite +- ExpressVPN +- Allianz +- Harness +- Gruntwork +- Spacelift +- Env0 +- Digger +- Terrateam +- Terramate + +For the full list of supporters, see the [OpenTofu website](https://opentofu.org/supporters/). + ## Problem -Historically, Terraform versions pre-1.x were notoriously backwards incompatible. This changed with Terraform 1.x releases and backwards -compatibility is assured for all subsequent 1.x releases. Our terraform modules and components strive to be on the latest version, -but with hundreds of modules and components, there’s sometimes a delay before we get the chance to _verify_ support. +Historically, Terraform versions pre-1.x were notoriously backward incompatible. This changed with Terraform 1.x releases, where backward compatibility is assured for all subsequent 1.x releases. While Terraform provides a stable experience, its recent shift to the BSL license introduces considerations for certain use cases, integrations, and compliance. + +OpenTofu is based on Terraform 1.5.7 (the last MPL-licensed version) and maintains compatibility with Terraform 1.x modules and continues to evolve as a fully open-source project under the stewardship of the CNCF. Cloud Posse modules and components are verified to work with OpenTofu as part of our test automation, but with hundreds of modules, there may be delays in verifying full support with every new release. + +OpenTofu has not been without controversy, with some organizations expressing concerns about the project's governance and sustainability. [HashiCorp sent a cease and desist](https://opentofu.org/blog/our-response-to-hashicorps-cease-and-desist/) to the project. However, the project has gained significant traction and support from the community, including key contributors from the original Terraform project. As a result, [it's sandbox application to the CNCF is delayed](https://github.com/cncf/sandbox/issues/81#issuecomment-2331714515) (as of 2024-09-05). ## Considerations -:::tip +Using OpenTofu ensures compatibility with third-party tools and integrations that are no longer supported with BSL-licensed Terraform versions. Furthermore, OpenTofu aligns with Cloud Posse's commitment to open-source principles and avoids potential compatibility and operational risks associated with BSL-licensed software. -Cloud Posse recommends using the latest 1.x version of Terraform +Cloud Posse only supports MPL-licensed versions of Terraform (Terraform 1.5.7 or older), and all versions of OpenTofu. -::: +Terraform 1.x remains backward compatible within the major version, but its BSL license imposes restrictions that may impact long-term use. + +## Recommendation + +Cloud Posse recommends using the [latest OpenTofu release](https://github.com/opentofu/opentofu/releases) for all new projects and workflows. -Prior to terraform 1.x, the version of terraform was a big deal due to backward compatibility issues between minor -releases. +:::important Consult with Your Legal Team + +Cloud Posse cannot provide legal advice. Organizations should consult with their legal teams to understand the implications of the BSL license on their use of Terraform. + +- [HashiCorp BSL License](https://www.hashicorp.com/bsl) +- [HashiCorp BSL FAQ](https://www.hashicorp.com/bsl-faq) + +::: ## Latest Releases -- https://github.com/hashicorp/terraform/releases +- **OpenTofu**: [https://github.com/opentofu/opentofu/releases](https://github.com/opentofu/opentofu/releases) +- **Terraform**: [https://github.com/hashicorp/terraform/releases](https://github.com/hashicorp/terraform/releases) + +## References + +- Mozilla Public License (MPL) applies to HashiCorp Terraform Versions 1.5.7 and earlier: [https://www.mozilla.org/en-US/MPL/](https://www.mozilla.org/en-US/MPL/) +- Business Source License (BSL) applies to HashiCorp Terraform Versions 1.6.0 and later: [https://www.hashicorp.com/bsl](https://www.hashicorp.com/bsl) +- Announcement of Terraform 1.6.0 and BSL License: [https://www.hashicorp.com/blog/announcing-hashicorp-terraform-1-6](https://www.hashicorp.com/blog/announcing-hashicorp-terraform-1-6) +- OpenTofu Project: [https://opentofu.io/](https://opentofu.io/) +- [OpenTofu Announces General Availability](https://www.linuxfoundation.org/press/opentofu-announces-general-availability) 2024-01-10, and ready for production use. +- [OpenTofu FAQ](https://opentofu.org/faq/) +- [OpenTofu Migration Guide](https://opentofu.org/docs/intro/migration/) +- [Atmos OpenTofu Configuration](https://atmos.tools/core-concepts/projects/configuration/opentofu) +- [Spacelift OpenTofu Configuration with Atmos](https://atmos.tools/integrations/spacelift#opentofu-support) +- [Martin Atkins](https://spacelift.io/blog/two-million-and-three-things-to-celebrate-in-the-opentofu-community) - Former core contributor of HashiCorp Terraform is now a core contributor to OpenTofu. diff --git a/docs/layers/project/design-decisions/design-decisions.mdx b/docs/layers/project/design-decisions/design-decisions.mdx index dc00e643e..cdb337a87 100644 --- a/docs/layers/project/design-decisions/design-decisions.mdx +++ b/docs/layers/project/design-decisions/design-decisions.mdx @@ -2,12 +2,19 @@ title: Foundational Design Decisions sidebar_position: 1 sidebar_label: Review Design Decisions +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; -Before deploying any infrastructure, there are some fundamental design decisions of our architecture. As you get started, be aware of these foundational choices. In our reference architecture, we've made some default decisions for you, but you may want to customize these based on your specific needs. + Before deploying any infrastructure, there are some fundamental design + decisions of our architecture. As you get started, be aware of these + foundational choices. In our reference architecture, we've made some default + decisions for you, but you may want to customize these based on your specific + needs. ### Review Design Decisions diff --git a/docs/layers/security-and-compliance/design-decisions/decide-on-infrastructure-software-static-analysis-tools.mdx b/docs/layers/security-and-compliance/design-decisions/decide-on-infrastructure-software-static-analysis-tools.mdx index 54052832d..7bef1383c 100644 --- a/docs/layers/security-and-compliance/design-decisions/decide-on-infrastructure-software-static-analysis-tools.mdx +++ b/docs/layers/security-and-compliance/design-decisions/decide-on-infrastructure-software-static-analysis-tools.mdx @@ -4,9 +4,12 @@ sidebar_label: "Static Analysis Tools" sidebar_position: 100 refarch_id: REFARCH-331 description: "Decide on Infrastructure & Software Static Analysis Tools" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Infrastructure Considerations: (terraform, docker) @@ -31,5 +34,3 @@ import KeyPoints from '@site/src/components/KeyPoints'; - WhiteSource - JFrog - - diff --git a/docs/layers/security-and-compliance/design-decisions/decide-on-kubernetes-platform-compliance-strategy.mdx b/docs/layers/security-and-compliance/design-decisions/decide-on-kubernetes-platform-compliance-strategy.mdx index 66f2310a0..8d3ebac65 100644 --- a/docs/layers/security-and-compliance/design-decisions/decide-on-kubernetes-platform-compliance-strategy.mdx +++ b/docs/layers/security-and-compliance/design-decisions/decide-on-kubernetes-platform-compliance-strategy.mdx @@ -4,9 +4,12 @@ sidebar_label: "Kubernetes Compliance" sidebar_position: 100 refarch_id: REFARCH-343 description: "Decide on a strategy for CIS Compliance/hardening on EKS" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; Decide on a strategy for CIS Compliance/hardening on EKS. @@ -19,5 +22,3 @@ Decide on a strategy for CIS Compliance/hardening on EKS. - [https://snyk.io/](https://snyk.io/) - [https://www.rapid7.com/](https://www.rapid7.com/) - - diff --git a/docs/layers/security-and-compliance/design-decisions/decide-on-log-retention-and-durability-architecture.mdx b/docs/layers/security-and-compliance/design-decisions/decide-on-log-retention-and-durability-architecture.mdx index 576819680..fef2a3c79 100644 --- a/docs/layers/security-and-compliance/design-decisions/decide-on-log-retention-and-durability-architecture.mdx +++ b/docs/layers/security-and-compliance/design-decisions/decide-on-log-retention-and-durability-architecture.mdx @@ -4,13 +4,16 @@ sidebar_label: "Log Retention" sidebar_position: 100 refarch_id: REFARCH-355 description: "Decide on log retention requirements" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem -Not all logs were created equal. Some may contain PHI (Protected Health Information) or CHD (Card Holder Data) while others are simply HTTP request logs. Depending on regional jurisdiction (E.g. Europe), there can be other requirements (E.g. [GDPR on AWS](https://docs.aws.amazon.com/whitepapers/latest/navigating-gdpr-compliance/monitoring-and-logging.html)). +Not all logs were created equal. Some may contain PHI (Protected Health Information) or CHD (Card Holder Data) while others are simply HTTP request logs. Depending on regional jurisdiction (E.g. Europe), there can be other requirements (E.g. [GDPR on AWS](https://docs.aws.amazon.com/whitepapers/latest/navigating-gdpr-compliance/monitoring-and-logging.html)). We need to identify the log destinations to discuss how to handle them. @@ -67,5 +70,3 @@ For everything in scope, we need to address: - [https://www.pcidssguide.com/what-are-the-pci-dss-log-retention-requirements/](https://www.pcidssguide.com/what-are-the-pci-dss-log-retention-requirements/) - [https://aws.amazon.com/s3/storage-classes/glacier/](https://aws.amazon.com/s3/storage-classes/glacier/) - - diff --git a/docs/layers/security-and-compliance/design-decisions/decide-on-strategy-for-hardened-base-amis.mdx b/docs/layers/security-and-compliance/design-decisions/decide-on-strategy-for-hardened-base-amis.mdx index a8ce92fcf..cbfa2a22e 100644 --- a/docs/layers/security-and-compliance/design-decisions/decide-on-strategy-for-hardened-base-amis.mdx +++ b/docs/layers/security-and-compliance/design-decisions/decide-on-strategy-for-hardened-base-amis.mdx @@ -4,9 +4,12 @@ sidebar_label: "Hardened AMIs" sidebar_position: 100 refarch_id: REFARCH-345 description: "Decide on how to harden base AMIs" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -26,14 +29,14 @@ We need a solution that covers both EKS (for customers using it) and for standal ### Use CIS or Not? -> CIS benchmarks are internationally recognized as security standards for defending IT systems and data against cyberattacks. Used by thousands of businesses, they offer prescriptive guidance for establishing a secure baseline configuration.The CIS Foundation is the most recognized industry standard for hardening OS images, however, they have not yet published the CIS standard for container-optimized OS. The traditional CIS benchmarks are for full-blown OSs with a different set of concerns that do not apply to a container-optimized OS. What CIS has defined are [the best practices for hardening EKS as a platform](https://aws.amazon.com/de/blogs/containers/introducing-cis-amazon-eks-benchmark/) and that standard is covered by `kube-bench`. So by running `kube-bench` on a cluster we would be able to validate if Bottlerocket meets the CIS standard for nodes managed by Kubernetes. While this is not the same as "certification", it might be good enough for benchmark compliance. +> CIS benchmarks are internationally recognized as security standards for defending IT systems and data against cyberattacks. Used by thousands of businesses, they offer prescriptive guidance for establishing a secure baseline configuration.The CIS Foundation is the most recognized industry standard for hardening OS images, however, they have not yet published the CIS standard for container-optimized OS. The traditional CIS benchmarks are for full-blown OSs with a different set of concerns that do not apply to a container-optimized OS. What CIS has defined are [the best practices for hardening EKS as a platform](https://aws.amazon.com/de/blogs/containers/introducing-cis-amazon-eks-benchmark/) and that standard is covered by `kube-bench`. So by running `kube-bench` on a cluster we would be able to validate if Bottlerocket meets the CIS standard for nodes managed by Kubernetes. While this is not the same as "certification", it might be good enough for benchmark compliance. ### Use Existing Hardened Image - AWS does not provide turnkey CIS-compliant base AMIs (third-party vendors only). - Bottlerocket is more secure but is still not _technically_ CIS-compliant out of the box -[https://github.com/bottlerocket-os/bottlerocket/issues/1297](https://github.com/bottlerocket-os/bottlerocket/issues/1297) + [https://github.com/bottlerocket-os/bottlerocket/issues/1297](https://github.com/bottlerocket-os/bottlerocket/issues/1297) - [https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami-bottlerocket.html](https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami-bottlerocket.html) @@ -56,5 +59,3 @@ With `cloud-init` we can patch the system at runtime. This has the benefit of no ### AWS Systems Manager Patch Manager With AWS Systems Manager can apply patch documents to running systems based on policies, but violates the principle of immutable infrastructure. - - diff --git a/docs/layers/security-and-compliance/design-decisions/decide-on-technical-benchmark-framework.mdx b/docs/layers/security-and-compliance/design-decisions/decide-on-technical-benchmark-framework.mdx index 52bc079f8..b0cebffd9 100644 --- a/docs/layers/security-and-compliance/design-decisions/decide-on-technical-benchmark-framework.mdx +++ b/docs/layers/security-and-compliance/design-decisions/decide-on-technical-benchmark-framework.mdx @@ -4,9 +4,12 @@ sidebar_label: "Compliance Framework" sidebar_position: 100 refarch_id: REFARCH-470 description: "Decide on a technical benchmark framework for compliance" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Benchmark Considerations @@ -45,5 +48,3 @@ The Technical Benchmark Framework should satisfy the vast majority of requiremen ### Questions - Has the team already started mapping out any of SOC2 controls that would influence technical controls or configurations? - - diff --git a/docs/layers/security-and-compliance/design-decisions/decide-on-waf-requirements-strategy.mdx b/docs/layers/security-and-compliance/design-decisions/decide-on-waf-requirements-strategy.mdx index 110d349c8..f9d5d24ff 100644 --- a/docs/layers/security-and-compliance/design-decisions/decide-on-waf-requirements-strategy.mdx +++ b/docs/layers/security-and-compliance/design-decisions/decide-on-waf-requirements-strategy.mdx @@ -4,15 +4,19 @@ sidebar_label: "WAF Requirements" sidebar_position: 100 refarch_id: REFARCH-384 description: "Decide on WAF requirements and strategy" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem ALBs have very limited capabilities to fend off attacks by themselves. Using Security Groups is not a scalable solution. [The number of inbound/outbound rules is limited to a max of 120 (60 ea)](https://docs.aws.amazon.com/vpc/latest/userguide/amazon-vpc-limits.html#vpc-limits-security-groups). To thwart any sort of Denial of Service (DoS) attack, more tools are required. Moreover, not all attacks are as easily identified as DoS attacks. Other threat vectors include SQL injection, XSS, etc. The older your applications, the more external dependencies you have, the greater the attack surface area. ## Solution + Deploy a Web Application Firewall (WAF) capable of performing Layer-7 inspection and mitigation. :::info @@ -22,7 +26,8 @@ Our recommendation is to deploy the AWS WAF with the AWS Managed Rules for the [ ::: ## Considerations -- ALB/NLB won’t provide TLS in-transit with nitro instances + +- ALB/NLB won’t provide TLS in-transit with nitro instances - AWS WAF only works with ALBs @@ -33,6 +38,5 @@ Our recommendation is to deploy the AWS WAF with the AWS Managed Rules for the [ Our recommendation is to use AWS WAF with ALB load balancers, then use AWS Nitro instances for e2e encryption inside EKS, and self-signed certs between the ALB and the pods. ## References -- [https://github.com/cloudposse/terraform-aws-waf](https://github.com/cloudposse/terraform-aws-waf) - +- [https://github.com/cloudposse/terraform-aws-waf](https://github.com/cloudposse/terraform-aws-waf) diff --git a/docs/layers/security-and-compliance/design-decisions/design-decisions.mdx b/docs/layers/security-and-compliance/design-decisions/design-decisions.mdx index df2675dbb..253401383 100644 --- a/docs/layers/security-and-compliance/design-decisions/design-decisions.mdx +++ b/docs/layers/security-and-compliance/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: Design Decisions sidebar_label: Review Design Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; - Review the key design decisions for how you'll monitor for security and compliance by leveraging the full suite of AWS security-oriented services. + Review the key design decisions for how you'll monitor for security and + compliance by leveraging the full suite of AWS security-oriented services. - + diff --git a/docs/layers/software-delivery/design-decisions/decide-how-to-distribute-docker-images.mdx b/docs/layers/software-delivery/design-decisions/decide-how-to-distribute-docker-images.mdx index a5b456f8c..d0c2a6bee 100644 --- a/docs/layers/software-delivery/design-decisions/decide-how-to-distribute-docker-images.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-how-to-distribute-docker-images.mdx @@ -4,12 +4,18 @@ sidebar_label: "Docker Registry" sidebar_position: 100 refarch_id: REFARCH-91 description: Choose the right method to distribute Docker images +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -Choosing the right method to distribute Docker images is crucial for efficient deployment and management.There are various options, including AWS ECR, GitHub Container Registry, DockerHub, Artifactory/Nexus, and self-hosted registries, with multiple advantages and drawbacks. + Choosing the right method to distribute Docker images is crucial for efficient + deployment and management.There are various options, including AWS ECR, GitHub + Container Registry, DockerHub, Artifactory/Nexus, and self-hosted registries, + with multiple advantages and drawbacks. #### Use AWS ECR @@ -28,10 +34,6 @@ DockerHub is well suited for public images because it’s the default registry, This is more common for traditional artifact storage in Java shops. We don’t see this typically used with Docker, but it is supported. - - #### Self-hosted Registries (e.g. Quay, Docker Registry, etc) We don’t recommend this approach because, at the very least, we’ll need to use something else like ECR for bootstrapping. - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-argocd-architecture.mdx b/docs/layers/software-delivery/design-decisions/decide-on-argocd-architecture.mdx index abf22c36a..7b33eb99c 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-argocd-architecture.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-argocd-architecture.mdx @@ -4,572 +4,67 @@ sidebar_label: "Argo CD Architecture" sidebar_position: 100 refarch_id: REFARCH-420 description: Considerations for deploying Argo CD +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -Deciding on the architecture for Argo CD involves considering multiple clusters, plugin management, and Kubernetes integration. We present some recommended strategies and considerations for deploying Argo CD, addressing potential risks, and detailing common deployment patterns. + Deciding on the architecture for Argo CD involves considering multiple + clusters, plugin management, and Kubernetes integration. This document + presents recommended strategies, potential risks, and common deployment + patterns for Argo CD. -## Considerations +## Context -- Multiple Argo CD clusters should be used to provide a means to systematically upgrade Argo CD in different environments. +Argo CD is a specialized tool for continuous delivery to Kubernetes, akin to how Terraform Cloud focuses on Terraform deployments. Argo CD does not support deployments outside of Kubernetes (e.g., uploading files to a bucket). While it supports plugins, these are not intended to extend its capabilities beyond Kubernetes. -- Argo CD runs as a single pod, and requires disruptive restarts to add or upgrade plugins. +## Considerations +- Deploy multiple Argo CD instances across clusters to facilitate systematic upgrades in different environments. +- Argo CD operates as a single pod, requiring disruptive restarts to add or upgrade plugins. - Restarts of Argo CD are disruptive to deployments. - -- The more Argo CD servers, the harder it is to visualize the delivery process. - -- Each Argo CD server must be integrated with each cluster it deploys to. - -- Argo CD can automatically deploy to the local cluster by installing a service account. - -Our recommendation is to deploy one Argo CD per cluster. - -## Introduction - -Argo CD is a tool designed specifically for continuous delivery to Kubernetes. It is similar to specialized platforms like Terraform Cloud, which focuses on deploying with Terraform. Argo CD does not support deployments outside of Kubernetes, such as uploading files to a bucket. While it does support plugins, these plugins are not intended to extend its deployment capabilities beyond Kubernetes. - -Two forms of escape hatches exist for slight deviations such as deployments involving Kubernetes-adjacent tooling like Helm, Kustomize, etc. - -1. Using Argo CD Config Plugins that shell-out and generate kubernetes manifests - -2. Using the Operator Pattern to deploy Custom Resources that perform some operation - -## Risks - -While the Operator Pattern is _ideal_ in theory, the reality is less than ideal: - -- Operators are frequently abandoned, or not regularly maintained (most are lucky to achieve traction) - -- Most operators are in alpha state, seemingly created as pet-projects to scratch an itch - -- Upgrading operators is non-trivial because you cannot have 2 versions deployed at the same time - -- Operators don’t automatically play well with other operators. For example, how would you pass a secret written from ExternalSecrets operator to a Terraform - -- When Operators fail, it might not break the pipeline. Debugging is also harder due to its asynchronous nature. - -## Use-Cases - -Here are some of the most common deployment patterns we see, and some ways in which those could be addressed: - -
- -1. **Deploy a** **generic application** **to Kubernetes** - - 1. Raw manifests are supported natively - - 2. Render Helm charts to manifests, then proceed as usual. - - 3. (Secrets and Secret Operators) Use ExternalSecrets Operator - -2. **Deploy** **generic Lambda** - - 1. Convert to Serverless Framework - - 2. Convert to Terraform - -3. **Deploy** **Serverless Framework Applications** - - 1. Serverless applications render to Cloudformation. See Cloudformation. - - 2. Wrap Cloudformation in a Custom Resource - -4. **Deploy Infrastructure with** **Cloudformation** - - 1. [https://github.com/linki/cloudformation-operator](https://github.com/linki/cloudformation-operator) - -5. **Deploy** **Single Page Application** **to S3** - - 1. This does not fit well into the - -6. **Deploy Infrastructure with** **Terraform** - - 1. Most operators are alpha. [https://github.com/hashicorp/terraform-k8s](https://github.com/hashicorp/terraform-k8s) -Something _feels_ wrong about deploying kubernetes with Terraform and then running Terraform inside of Kubernetes? - - 2. While it works, it’s a bit advanced to express. - -7. **Deploy** **Database Migrations** - - 1. Replicated (enterprise application delivery platform) maintains schemahero. Only supports DDL. -[https://github.com/schemahero/schemahero](https://github.com/schemahero/schemahero) - - 2. Standard kubernetes Jobs calling migration tool - -## Pros - -- Simplify dependency management across components (eventually, Argo CD will redeploy everything) - -- Protect KubeAPI from requiring public access (reduce attack surface) - -- Powerful CD tool for Kubernetes supporting multiple rollout strategies for Pods - -- Nice UI - -- Easy to use many kinds of deployment toolchains (that run in argo cd docker image) - -- Feels like deployments are faster - -- “Backup kubernetes cluster” out of the box - -- Consistent framework for how to do continuous deployment regardless of CI platform - -## Cons - -- Breaks the immediate feedback loop from commit to deployment -(as deployment with Argo CD are async) - -- Application CRD should be created in the namespace where argo cd is running - -- Applications name must be unique for Argo CD instance - -- Custom deployment toolchain (anything except kube resources/helm/kustomize/jsonnet) requires to build a custom docker image for argo cd and redeploy it. - -- Redeploying Argo CD is potentially a disruptive operation to running deployments -(like restarting Jenkins) and therefore must be planned. - -- Updating plugins requires re-deploying Argo CD since the tools must exist in the Argo CD docker image - -- Access management has an additional level - github repo access + argo cd projects + rbac. We can have `helm tiller` type of problem - -- Additional self-hosted solution ( while classic deploy step with helm 3 runs on CI and use only kubectl ) - -- Repository management (give access to private repos for argo cd) does not support declarative way (need research for ‘repo pattern’ workaround) - -- Argo CD is in the critical path of deployments and has it’s own SDLC. - -## Infrastructure - -### Create terraform-helm-argo cd module - -- Deploy Argo CD with Terraform so it will work well with Spacelift setup continuous delivery to Kubernetes - -- Use terraform-helm-provider - -- Use projects/terraform/argo cd/ (do not bundle with projects/terraform/eks/) -In [https://github.com/acme/infrastructure](https://github.com/acme/infrastructure) - -- Use spacelift to deploy/manage with GitOps - -- Use terraform-github-provider to create a deployment (e.g. deploy-prod) repository to manage Argo CD and manage all branch protections -(confirm with acme) - -### Create GitHub PAT for Managing Repos - -:::info -The scopes outlined in [the provider documentation](https://github.com/integrations/terraform-provider-github/blob/main/CONTRIBUTING.md#github-personal-access-token) covers the entirety of the provider's capabilities. When applying the principal of least-privileged, this component only interacts with a few repositories and their settings, therefore the PAT generated for this specific use-case should be limited to those actions and should not contain overly permissive scopes such as `admin:org`. - -::: - -The PAT needs to have the following permissions: - -| **Scope(s)** | **Purpose** | -| ------------------------------------------ | ----------------------------------------------------------------------------- | -| - repo | Create repository; Manage repository’s settings; Modify repository’s contents | -| - read:org


- read:discussion | Used to validate teams when setting up branch protections and team access | - -Once generated, the PAT should be stored in an SSM parameter (by default at `/argo cd/github/api_key`) where it will be retrieved and passed to the [integrations/github](https://registry.terraform.io/providers/integrations/github/latest) Terraform provider. - -### Create Repos for Deployment - -We’ll use 3 repos “`argo cd-deploy-prod`” and “`argo cd-deploy-non-prod`”, and “argo cd-deploy-preview” for release apps. - -Repos will be created manually due to GitHub organization permissions required. - -Repos will be managed terraform configure branch protection, using a bot user that has admin permissions to the repos. - -## Repo structure - -We’ll create 3 repos to start. - -- `acme/argo cd-deploy-prod/` - -- `acme/argo cd-deploy-non-prod/` - -- `acme/argo cd-deploy-preview/` (for release apps) - -By separating the preview environments from the other repos, we’re able to lifecycle it more frequently (e.g. resetting the commit history when it gets too large). We can also be more liberal about branch protections to enable continuous delivery without approvals. - -Argo CD will only deploy from the `main` branch. - -Here’s an example layout - -| | | | | -| ------------------------------ | ------------------------------------ | ------------------------ | ------------------------------------------------------------------------------------------------------------------ | -| **Repository** | **Cluster**
**(region & stage)** | **Kubernetes Namespace** | **Kubernetes Manifests** | -| `acme/argo cd-deploy-prd/` | `uw2-prd/*` | `argo cd/` | | -| | | `prd/` | `bazle-demo-frontend.yaml`
`bazle-demo-api.yaml`
`bazle-demo-db.yaml`
(output from helmfile template) | -| | `uw2-tools/*` | `argo cd/` | | -| | | `github-action-runners/` | | -| | `uw2-auto/*` | | | -| `acme/argo cd-deploy-non-prd/` | `uw2-stg/*` | `argo cd/` | | -| | | `stg/` | `bazle-demo-frontend.yaml`
`bazle-demo-api.yaml`
`bazle-demo-db.yaml`
(output from helmfile template) | -| | | `uat/` | | -| | `uw2-dev/*` | | | -| | `uw2-sbx/*` | | | -| `acme/argo cd-deploy-preview/` | `uw2-dev/*` | | | -| | | `pr2-example-app/` | `example.yaml` | - -### Setup Branch Protections - -#### Production - -We need to tightly control who can deploy to these environments. Using branch protections, no one (including bot users) can commit directly to the `main` branch. - -Pull Requests will be the only way to affect the main branch and will require approvals from teams defined by the `CODEOWNERS` and based on the cluster and namespace and status checks. - -Required status checks to merge the pull requests: - -1. Workflows in the non-production environment will post status indicating success or failure. In the example below, the commit has a successful `staging deployment (success)` check, enabling deployment to production. - -2. Workflows in the `acme/argo cd-deploy-prd/.github/workflows/` will do linting on the kubernetes manifests (where manifests exists) - -
- -Pull Requests will be predominantly machine generated by GitHub Actions. However, anyone can open a Pull Request (e.g for hotfixes), and with appropriate approvals get them deployed. - -#### Non-production - -The non-prod environments will work identically to production environments, with one exception. We’ll use the Mergify bot to automatically approve pull requests matching specific criteria, while supporting the same core functionality of production without the human overhead of acquiring approvals. - -#### Preview Environments - -These environments will not require Pull Requests, however, will restrict the main branch to commits from the `acmebot`. This will enable rapid continuous delivery for review apps. - -#### Deploy Argo CD with terraform-helm-argo cd module on each target cluster - -We’ll deploy Argo CD on each target cluster so it can easily manage in-cluster resources and we don’t need to worry about cross-cluster networking, permissions, roles, etc. - -#### Configuration - -##### Settings (non-sensitive) - -These come from the `example1/env/` folder and are defined by namespace. - -##### Secrets - -## Release Engineering Process - -This is our recommended process for continuous delivery of monorepos. - -Using Argo CD with helmfile provides limited utility for the following reasons: - -- We cannot use helmfile’s native ability to fetch secrets from ASM or SSM, because that will result in secrets getting committed to source control - -- We cannot use helmfile hooks, because those run at execution time of helmfile, and not during synchronization with Argo CD - -Instead, we can achieve similar results using native support for Helm in Argo CD and the External Secrets Operator. - -Inside each monorepo, define a charts folder that has all charts for this repository. - -- We recommend one chart for each type of app (e.g. c++ microservice), rather than one chart per app. - -- GitHub Action should validate the chart (lint check) for all Pull Requests - -### Preview Environments - -#### Developer adds `deploy` label to Pull Request - -GitHub Action Workflow runs on events “opened”, “reopened”, “synchronized” - -- Builds & pushes docker images - -- Checks out the main branch in the preview repo - -- Commits the follow custom resource and pushes to main branch - -File is generated by the GitHub Action based on `example1/app.yaml` - -File is written to `acme/argo cd-deploy-preview/uw2-stg/argo cd/pr2-example1.yaml` - -####### _Example 1: acme/__**argo cd-deploy-preview**__/uw2-stg/argo cd/pr2-example1.yaml (artifact)_ - -```yaml -# Example Argo CD Application (use built-in Helm) generated from the app.yaml -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: pr2-example1 - # This should always be argo cd (it’s not where the app will run) - namespace: argo cd - finalizers: - - resources-finalizer.argo cd.argoproj.io -spec: - source: - # Example of shared chart in the bazel-monorepo-demo - path: charts/monochart - repoURL: acme/bazel-monorepo-demo/ - targetRevision: {git-sha} - helm: - version: v3 - valueFiles: - - example1/env/default.yaml - - example1/env/preview.yaml - parameters: - - name: image - value: ecr/example1:{git-sha} -``` - -Example of the `acme/bazel-monorepo-demo/example1/env/default.yaml` - -```yaml title="acme/bazel-monorepo-demo/example1/env/default.yaml" -# This is a standard helm values file -# The defaults set common values for all releases -use_db: true -``` - -```yaml title="acme/bazel-monorepo-demo/example1/env/preview.yaml" -# This is a helm values file -# This is an example of overriding the defaults. It needs parameterization. -host: pr12-example1.acme.org -``` - -```yaml title="acme/bazel-monorepo-demo/example1/app.yaml" -# this is the configuration used by the GitHub action to build the Argo CD Application Manifest -chart: charts/monochart -``` - -GitHub Action Step: - -1. Looks at `app.yaml` (our own custom specification) to generate the Argo CD Application - -2. Produces `acme/argo cd-deploy-preview/uw2-stg/argo cd/pr2-example1.yaml` - -####### _Example 2_ - -Fundamentally similar to Example 1. The difference is that it bypasses the need for Helm. - -```yaml -# Example with Raw Kubernetes Manifests (alternative to helm strategy) -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: pr2-example1 - # This should always be argo cd (it’s not where the app will run) - namespace: argo cd - finalizers: - - resources-finalizer.argo cd.argoproj.io -spec: - destination: - namespace: pr2-example1 - project: default - source: - path: uw2-stage/pr2-example1/ - repoURL: acme/argo cd-deploy-preview/ - targetRevision: main ---- -# Deployment -# Service -# Ingress - -``` - -### Staging Releases - -The process always starts by cutting a GitHub release against the monorepo. - -This triggers the GitHub Action on release events. - -- Retag the artifacts (presumed to already exist) - -- Open Pull Request against `acme/argo cd-deploy-non-prd/` - -- Open Pull Request against `acme/argo cd-deploy-prd/` - -When respective pull requests are merged to main, subject to all branch protections, Argo CD kicks off the automatic deployment. - -### Production Releases - -Production releases are triggered by merging the respective PR, subject to all branch protections. - -The most notable branch protection is the requirement that the status check for the commit (corresponding to the release) has a passing “`staging deployment`” check. - -
- -## Demo - -Update `acme/bazel-monorepo-demo` with 3 or more examples (we’ll just clone the current one) to set up `example1/`, `example2/`, `example3/` services. The objective is to show how we can use shared charts as well as dedicated charts per service. - -Convert helmfile raw chart examples to sample charts in the `charts/` folder of the monorepo. - -Deprecate the helmfile.yaml example - -Update GitHub Actions workflow - -1. Produce the Argo CD Application manifest artifact from the app.yaml - -2. Open Pull Request against repo (for prd and non-prd) - -3. Push directly for preview environments - -Create `acme/argo cd-deploy-non-prd/.github/mergify.yml` for auto-merging PRs - -```yaml title="acme/argo cd-deploy-non-prd/.github/mergify.yml" -pull_request_rules: -- name: "approve automated PRs that have passed checks" - conditions: - - "check-success~=lint/kubernetes" - - "check-success~=lint/helm" - - "base=main" - - "author=acmebot" - - "head~=argo cd/.*" - actions: - review: - type: "APPROVE" - bot_account: "acmebot" - message: "We've automatically approved this PR because the checks from the automated Pull Request have passed." - -- name: "merge automated PRs when approved and tests pass" - conditions: - - "check-success~=lint/kubernetes" - - "check-success~=lint/helm" - - "base=main" - - "head~=argo cd/.*" - - "#approved-reviews-by>=1" - - "#changes-requested-reviews-by=0" - - "#commented-reviews-by=0" - - "base=master" - - "author=acmebot" - actions: - merge: - method: "squash" - -- name: "delete the head branch after merge" - conditions: - - "merged" - actions: - delete_head_branch: {} - -- name: "ask to resolve conflict" - conditions: - - "conflict" - actions: - comment: - message: "This pull request is now in conflict. Could you fix it @{{author}}? 🙏" - -- name: "remove outdated reviews" - conditions: - - "base=main" - actions: - dismiss_reviews: - changes_requested: true - approved: true - message: "This Pull Request has been updated, so we're dismissing all reviews." - -``` - -Create the sample `acme/argo cd-deploy-non-prd/.github/CODEOWNER` - -```yaml title="acme/argo cd-deploy-non-prd/.github/mergify.yml" -uw2-stg/* @acme/staging @acme/mergebots - -``` - -Enable branch protections - -## Research - -- Feedback loop with Argo CD to update GitHub Status API - -- [https://github.com/argoproj-labs/argo-kube-notifier](https://github.com/argoproj-labs/argo-kube-notifier) - -```yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: argo cd-notifications-cm -data: - config.yaml: | - triggers: - # Define your custom trigger - - name: my-custom-trigger - condition: app.status.sync.status == 'Unsynced' && ( time.Parse(app.status.operationState.finishedAt) - time.Parse(app.status.operationState.startedAt) ) < 100 - template: my-custom-template - templates: - # Add your custom template - - name: my-custom-template - title: Hello {{.app.metadata.name}} - body: | - Application details: {{.context.argo cdUrl}}/applications/{{.app.metadata.name}}. -``` - -- [https://argo cd-notifications.readthedocs.io/en/stable/services/opsgenie/](https://argo cd-notifications.readthedocs.io/en/stable/services/opsgenie/) - -```yaml -apiVersion: argoproj.io/v1alpha1 -kind: Notification -metadata: - name: notification-1 -spec: - # Define resource need to be monitor. - monitorResource: - Group: "argoproj.io" - Resource: "application" - Version: "v1alpha1" - Namespace: default - notifiers: - - name: slack - slack: - hookUrlSecret: - name: my-slack-secret - key: hookURL - channel: testargonotification - hookurl: "https://hooks.slack.com" - rules: - - allConditions: - - jsonPath: "status/sync/status" - operator: "ne" - value: "Unsynced" - Events: - - message: "Condition Triggered : Deployment = {{.metadata.name}} replicaset does not match. Required Replicas = {{.status.replicas}}, Current Replicas={{.status.readyReplicas}}" - emailSubject: "Argo Notification Condition Triggered {{.metadata.name}}" - notificationLevel: "warning" - notifierNames: - - "slack" - name: rule1 - initialDelaySec: 60 - throttleMinutes: 5 - -``` - -- Can `deliverybot` simplify some of this process (including updating the GitHub Status API) - -- At the moment it looks like delivery bot will add complexity. As the only thing it is doing - triggering a `deployment` event for github actions that do all work. - -[https://deliverybot.dev/docs/guide/2-deploy-action/](https://deliverybot.dev/docs/guide/2-deploy-action/) - -[https://github.com/deliverybot/example-helm/blob/master/.github/workflows/cd.yml](https://github.com/deliverybot/example-helm/blob/master/.github/workflows/cd.yml) - -- Delivery bot could we useful if we get rid of pattern with `PR for prod approval` as it allow automate deployment created based on commit statuses [https://deliverybot.dev/docs/configuration/#targetrequired_contexts](https://deliverybot.dev/docs/configuration/#targetrequired_contexts) - -- Running delivery bot on prem with kubernetes is not documented[https://github.com/deliverybot/deliverybot/tree/master/packages/kubernetes](https://github.com/deliverybot/deliverybot/tree/master/packages/kubernetes) - -- SaaS delivery bot cost per user - -## Suggestions - -Common suggestions and practices to consider and discuss in the implementation of Argo CD solutions. - -### ALB and Ingress - -#### Overview - -- Argo CD is capable of deploying and updating resources of all types on a Kubernetes cluster (including RBAC and Ingress, etc.). Some extra consideration should be taken in the Architecture used per customer engagement. Where possible we should try to isolate the Argo CD API and UI from other application and service traffic. - -- The term `common` ALB/Ingress in this section refers to identifying an ingress by `group.name` where any ingress that references that same `group.name` applies its configurations to to the same alb resources. There is also a concept of weighting group based rules so it is possible for another ingress annotations to supersede already established configurations by setting a smaller number (higher priority) with `group.order`. - -- Previous load balancer patterns used with Cloud Posse engagements utilize a single, `common`, internet-facing ALB, and Ingress Groups ensure that all services will use this ALB and Ingress. Services such as Argo CD should probably not be internet-facing, even if they are backed by authentication. Maybe we need to extend this pattern to multiple `group.name` ALBs, for internet facing, public facing and any service that should be treated as a separate security or functional concern. - -- Ingress groups should not overlap different k8s RBAC permission boundaries (e.g. namespaces) because different users can override existing rules in the ingress group with higher priority rules. So maybe `common` ALBs should be per k8s namespace. - -#### References - -- [https://docs.aws.amazon.com/eks/latest/userguide/alb-ingress.html](https://docs.aws.amazon.com/eks/latest/userguide/alb-ingress.html) - -- [https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.2/guide/ingress/annotations/#ingressgroup](https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.2/guide/ingress/annotations/#ingressgroup) - - +- Increasing the number of Argo CD servers complicates visualizing the delivery process. +- Each Argo CD server must integrate with every cluster it deploys to. +- Argo CD can deploy to the local cluster by using a service account. + +### Pros + +- Simplifies dependency management across components. +- Protects the KubeAPI by reducing public access requirements. +- Provides a powerful CD tool for Kubernetes with multiple pod rollout strategies. +- Offers a user-friendly UI and supports diverse deployment toolchains within the Argo CD Docker image. +- Enables faster deployments and "backup Kubernetes cluster" capabilities. +- Establishes a consistent framework for continuous deployment independent of the CI platform. + +### Cons + +- Asynchronous deployments can break the immediate feedback loop from commit to deployment. +- Application CRDs must reside in the namespace where Argo CD runs. +- Application names must be unique per Argo CD instance. +- Custom toolchains require custom Docker images, necessitating Argo CD redeployment. +- Redeploying Argo CD can disrupt active deployments. +- Plugin updates require redeployment since tools must be included in the Docker image. +- Access management involves multiple levels (e.g., GitHub repo access, Argo CD projects, RBAC), introducing complexity. +- Requires additional self-hosted solutions compared to simpler CI-based deployments with Helm 3. +- Repository management for private repos in Argo CD lacks a declarative approach, needing research for potential patterns. +- Argo CD's lifecycle becomes part of the critical path for deployments. + +## Recommendations + +- **Deploy one Argo CD instance per cluster** to simplify upgrades and manage disruptions effectively. +- **Use a single Argo CD instance for all namespaces within a cluster** to centralize deployment management and reduce complexity. +- **Adopt a dedicated repository strategy** managed by Terraform via the GitHub Terraform provider: + - One repository for production environments. + - One repository for non-production environments. + - One repository for preview environments. +- **Avoid using plugins**: + - Commit raw manifests (e.g., rendered from Helm templates or Kustomize) directly to the repository. + - Shift manifest rendering to CI to ensure predictable, verifiable deployments. + - This approach simplifies troubleshooting, avoids plugin upgrade issues, and ensures complete visibility into what is deployed. +- **Deploy operators that require IAM roles and backing services with Terraform**, not Argo CD, to ensure proper role management and infrastructure provisioning. +- **Use Argo CD for application deployments** in combination with GitHub Actions to streamline deployment pipelines and align with CI/CD best practices. +- **Use Helm to Provision Argo CD** with Terraform diff --git a/docs/layers/software-delivery/design-decisions/decide-on-argocd-deployment-repo-architecture.mdx b/docs/layers/software-delivery/design-decisions/decide-on-argocd-deployment-repo-architecture.mdx index e6ac67442..4107d0e85 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-argocd-deployment-repo-architecture.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-argocd-deployment-repo-architecture.mdx @@ -4,9 +4,12 @@ sidebar_label: "ArgoCD Deployment Repos" sidebar_position: 100 refarch_id: REFARCH-405 description: Structure for Argo CD deployment repositories +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ### Context @@ -61,5 +64,3 @@ Our recommendation is ~3 repos, with multiple clusters in each: ## Related - [Decide on ArgoCD Architecture](/layers/software-delivery/design-decisions/decide-on-argocd-architecture) - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-branching-strategy.mdx b/docs/layers/software-delivery/design-decisions/decide-on-branching-strategy.mdx index c3042e825..281af2276 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-branching-strategy.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-branching-strategy.mdx @@ -4,9 +4,12 @@ sidebar_label: "Branching Strategy" confluence: sidebar_position: 100 description: Branching strategies for your software delivery +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement @@ -35,7 +38,6 @@ code coverage and teams with a high percentage of junior devs all benefit from t However, Gitflow may not be suitable for startups where development speed is the priority. - ```mermaid --- title: Gitflow @@ -152,6 +154,7 @@ use different flows. However, that often leads to complexity in the CI/CD pipeli For these reasons, we recommend a consistent branching strategy, at a minimum on a team level. ## References -* [A successful Git branching model](https://nvie.com/posts/a-successful-git-branching-model/) -* [Trunk-Based Development vs Git Flow: When to Use Which Development Style](https://blog.mergify.com/trunk-based-development-vs-git-flow-when-to-use-which-development-style/) -* [Long-lived branches with Gitflow](https://www.thoughtworks.com/radar/techniques/long-lived-branches-with-gitflow) + +- [A successful Git branching model](https://nvie.com/posts/a-successful-git-branching-model/) +- [Trunk-Based Development vs Git Flow: When to Use Which Development Style](https://blog.mergify.com/trunk-based-development-vs-git-flow-when-to-use-which-development-style/) +- [Long-lived branches with Gitflow](https://www.thoughtworks.com/radar/techniques/long-lived-branches-with-gitflow) diff --git a/docs/layers/software-delivery/design-decisions/decide-on-customer-apps-for-migration.mdx b/docs/layers/software-delivery/design-decisions/decide-on-customer-apps-for-migration.mdx index eb2b95067..a35e2daf3 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-customer-apps-for-migration.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-customer-apps-for-migration.mdx @@ -4,9 +4,12 @@ sidebar_label: "App Migration" sidebar_position: 100 refarch_id: REFARCH-256 description: Identify applications that should migrate to the new platform +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; We need to identify an application and its associated services that are suitable as the first candidate for migration to the new platform. This is an application that will be targeted by all the release engineering work. @@ -21,8 +24,6 @@ Apps that do not have these characteristics may require more engineering effort. Using any existing repository will pose a risk of triggering GitHub events (E.g. pull requests, releases, etc) that other existing CI/CD systems (e.g. Jenkins, CircleCI, etc) will respond to. Furthermore, several GitHub Action events only work on the default branch (e.g. `main`) and for this reason, we will need to merge to PRs to test the end-to-end process. For this reason, we recommend starting with a model application template repository that your team can use to document and train others on your CI/CD process. -Completing the migration workbook will help identify suitable applications. Our workbook template is here [https://docs.google.com/spreadsheets/d/1CDcJosaqoby2Fq2AmZnf-xRizI4pcc-sqpi04ggHqSI/edit#gid=863544204](https://docs.google.com/spreadsheets/d/1CDcJosaqoby2Fq2AmZnf-xRizI4pcc-sqpi04ggHqSI/edit#gid=863544204) and can be copied and shared. +Completing the migration workbook will help identify suitable applications. Our workbook template is here [https://docs.google.com/spreadsheets/d/1CDcJosaqoby2Fq2AmZnf-xRizI4pcc-sqpi04ggHqSI/edit#gid=863544204](https://docs.google.com/spreadsheets/d/1CDcJosaqoby2Fq2AmZnf-xRizI4pcc-sqpi04ggHqSI/edit#gid=863544204) and can be copied and shared. Our goal is to migrate a couple of apps within the allotted Sprint(s), however, we highly recommend leaving some for homework. - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-database-seeding-strategy-for-ephemeral-preview-enviro.mdx b/docs/layers/software-delivery/design-decisions/decide-on-database-seeding-strategy-for-ephemeral-preview-enviro.mdx index c8e97f46a..e2b2ed818 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-database-seeding-strategy-for-ephemeral-preview-enviro.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-database-seeding-strategy-for-ephemeral-preview-enviro.mdx @@ -4,9 +4,12 @@ sidebar_label: "Database Seeding Strategy" sidebar_position: 100 refarch_id: REFARCH-144 description: What data should be used in preview environments +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -18,6 +21,7 @@ As a general best practice, we should never use production data in non-productio ::: ## Considerations + We prefer to include the DBA in these conversations. Suggested requirements: @@ -31,6 +35,7 @@ Suggested requirements: - They should have realistic data, so the environments are testing something closer to staging/production ## Considered Options + **Option 1:** Seed data (fixtures) - **recommended** - Most database migration tools support something like this (e.g. `rake db:fixtures:load`) @@ -58,5 +63,3 @@ Suggested requirements: **Option 5:** Dedicated cluster (not advised) - Too slow to launch (e.g. +30-40 minutes), expensive, complicated to implement - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-github-actions-workflow-organization-strategy.mdx b/docs/layers/software-delivery/design-decisions/decide-on-github-actions-workflow-organization-strategy.mdx index 3f4cced06..f0e099a0a 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-github-actions-workflow-organization-strategy.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-github-actions-workflow-organization-strategy.mdx @@ -4,9 +4,12 @@ sidebar_label: "GitHub Actions Workflow Organization Strategy" sidebar_position: 100 refarch_id: REFARCH-421 description: Decide where GitHub Actions workflows are kept +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -78,15 +81,13 @@ deployment.newDeployment(JSON.parse(`${{ inputs.stages }}`)) - Use a private template repository to make it easy for developers to initialize new projects - Adjust `webhook_startup_timeout` in the chart. This setting is used for automatically scaling -back replicas. The recommended default is 30 minutes, but no one size fits all. Here's further -documentation for your consideration: [scaling runners](https://github.com/actions/actions-runner-controller/blob/master/docs/automatically-scaling-runners.md) + back replicas. The recommended default is 30 minutes, but no one size fits all. Here's further + documentation for your consideration: [scaling runners](https://github.com/actions/actions-runner-controller/blob/master/docs/automatically-scaling-runners.md) ## Related -- [Decide on Strategy for Continuous Integration](/layers/software-delivery/design-decisions/decide-on-strategy-for-continuous-integration) +- [Decide on Strategy for Continuous Integration](/layers/software-delivery/design-decisions/decide-on-strategy-for-continuous-integration) - [Decide on Self-Hosted GitHub Runner Strategy](/layers/software-delivery/design-decisions/decide-on-self-hosted-github-runner-strategy) - [GitHub Actions](/learn/tips-and-tricks/github-actions) - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-hot-fix-or-rollback-strategy.mdx b/docs/layers/software-delivery/design-decisions/decide-on-hot-fix-or-rollback-strategy.mdx index 99df6559f..4a3524662 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-hot-fix-or-rollback-strategy.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-hot-fix-or-rollback-strategy.mdx @@ -4,9 +4,12 @@ sidebar_label: "Hot-fixes and Rollbacks" sidebar_position: 100 refarch_id: REFARCH-428 description: Decide how to revert changes +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; Releasing a change is when it’s made available to a user or subset of users. In an ideal world, deployments should not equal releases. @@ -27,5 +30,3 @@ If release branches are utilized, then any bug-fix commits need to be pushed to ## Related - [Decide on Release Promotion Strategy](/layers/software-delivery/design-decisions/decide-on-release-promotion-strategy) - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-how-ecs-apps-are-deployed.mdx b/docs/layers/software-delivery/design-decisions/decide-on-how-ecs-apps-are-deployed.mdx index aa9d46cde..35c9dc2ea 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-how-ecs-apps-are-deployed.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-how-ecs-apps-are-deployed.mdx @@ -4,9 +4,12 @@ sidebar_label: "ECS App Deployment" sidebar_position: 100 refarch_id: REFARCH-411 description: Decide how to deploy applications to ECS +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; We need to decide on what methodology to use when deploying applications to ECS. @@ -27,5 +30,3 @@ Think of Helm Charts in Kubernetes as similar to using Terraform Modules for ECS - Auto deploy on merges - Auto deploy on manual cut releases - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-kubernetes-application-artifacts.mdx b/docs/layers/software-delivery/design-decisions/decide-on-kubernetes-application-artifacts.mdx index d4d556bcd..b29458377 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-kubernetes-application-artifacts.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-kubernetes-application-artifacts.mdx @@ -4,9 +4,12 @@ sidebar_label: "Kubernetes Application Delivery" sidebar_position: 100 refarch_id: REFARCH-427 description: How to package and deploy applications to Kubernetes +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; We prefer strategies that ship the Application code (e.g. docker images) with the Application configuration (E.g. everything needed to run the application on the platform, such as manifests, IAM roles, etc.) @@ -27,5 +30,3 @@ We prefer strategies that ship the Application code (e.g. docker images) with th - Helm charts - Kustomize - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-maintenance-page-solution.mdx b/docs/layers/software-delivery/design-decisions/decide-on-maintenance-page-solution.mdx index 9ace5e96f..685a45101 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-maintenance-page-solution.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-maintenance-page-solution.mdx @@ -4,13 +4,17 @@ sidebar_label: "Maintenance Page" sidebar_position: 100 refarch_id: REFARCH-112 description: Decide how to deliver maintenance pages +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem -
+ +
When your sites go down, we need to be able to communicate more graciously to customers that you’re having issues than a generic “502 Bad Gateway” message. @@ -21,7 +25,7 @@ Specifically, this decision relates to services behind an ALB. CloudFront and S3 ## Solution -We recommend deploying a static maintenance page. The industry best practice is to host the downtime page on a cloud provider that does not share infrastructure with your primary cloud provider. E.g. S3 is not recommended, as even S3 has gone down. That said, using a separate cloud provider is a micro-optimization for a very narrow set of failure scenarios. +We recommend deploying a static maintenance page. The industry best practice is to host the downtime page on a cloud provider that does not share infrastructure with your primary cloud provider. E.g. S3 is not recommended, as even S3 has gone down. That said, using a separate cloud provider is a micro-optimization for a very narrow set of failure scenarios. Some related considerations are how the maintenance page will be activated. @@ -50,5 +54,3 @@ Use ALB with [fixed response](https://docs.aws.amazon.com/elasticloadbalancing/l - Use `fixed-response` populated with HTML from `file` with inline CSS, SVGs, etc. and no external dependencies (if possible) - Add GA code for analytics - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-pipeline-strategy.mdx b/docs/layers/software-delivery/design-decisions/decide-on-pipeline-strategy.mdx index 6eba34592..6f50cfa9d 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-pipeline-strategy.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-pipeline-strategy.mdx @@ -4,9 +4,12 @@ sidebar_label: "Pipeline Strategy" sidebar_position: 100 refarch_id: REFARCH-253 description: Decide what CI/CD pipelines are needed to deliver your software +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement @@ -39,13 +42,27 @@ Predefined workflows
Triggered on changes in a pull request that target the `main` branch. It will perform CI (build and test) and CD (deploy into _Preview_ and/or _QA_ environments) jobs.
Main branch workflow
-
Triggered on commit into the `main` branch to integrate the latest changes and create/update the next draft release. It will perform CI (build and test) and CD (deploy into `Dev` environment) jobs.
+
+ Triggered on commit into the `main` branch to integrate the latest changes and + create/update the next draft release. It will perform CI (build and test) and + CD (deploy into `Dev` environment) jobs. +
Release workflow
-
Triggered when a new release is published. The workflow will promote artifacts (docker image) that was built by the “_Feature branch workflow_“ to the release version and deploy them to the `Staging` and `Production` environments with approval gates. In addition, the workflow will create a special `release/{version}` branch that is required for the hotfixes workflow.
+
+ Triggered when a new release is published. The workflow will promote artifacts + (docker image) that was built by the “_Feature branch workflow_“ to the + release version and deploy them to the `Staging` and `Production` environments + with approval gates. In addition, the workflow will create a special `release/ + {version}` branch that is required for the hotfixes workflow. +
Hot Fix Branch workflow
-
Triggered on changes in a pull request that target any `release/{version}` branch. It will perform CI (build and test) and CD (deploy into `Hotfix` environment) jobs.
+
+ Triggered on changes in a pull request that target any `release/{version}` + branch. It will perform CI (build and test) and CD (deploy into `Hotfix` + environment) jobs. +
Hot Fix Release workflow
Triggered on commit into the `release/{version}` branch to integrate new hotfix changes. It will perform CI (build and test) and CD (deploy into the `Production` environment with approval gates) jobs. In addition, it will create a new release with incremented patch version and create a regular PR target `main` branch to integrate the hotfix with the latest code.
@@ -71,7 +88,7 @@ What we implement as part of our approach and the specific use cases we address ### CI testing based on the Feature branch workflow -- A developer creates a PR target to the `main` branch. GHA will perform build and run test on each commit. The developer should have ability to deploy/undeploy the changes to `Preview` and/or `QA` environment by adding/removing specific labels in PR Gihub UI. When PR merged or closed GHA should undeploy the code from `Preview`/`QA` environments where it is deployed to. +- A developer creates a PR target to the `main` branch. GHA will perform build and run test on each commit. The developer should have ability to deploy/undeploy the changes to `Preview` and/or `QA` environment by adding/removing specific labels in PR Gihub UI. When PR merged or closed GHA should undeploy the code from `Preview`/`QA` environments where it is deployed to. ### CI Preview Environments @@ -115,7 +132,7 @@ What we implement as part of our approach and the specific use cases we address ### Hotfix Pull Request workflow - In the case when there is a bug in the application that runs in the `Production` environment, the Developer needs to create a Hotfix PR. -- Hotfix PR should target to “_Release branch_” `release/{version}`. GHA should perform build and run tests on each commit. The developer should have ability to deploy/undeploy the changes to `Hotfix` environment by adding/removing specific labels in PR Gihub UI. When PR merged or closed GHA should undeploy the code from `Hotfix` environment. +- Hotfix PR should target to “_Release branch_” `release/{version}`. GHA should perform build and run tests on each commit. The developer should have ability to deploy/undeploy the changes to `Hotfix` environment by adding/removing specific labels in PR Gihub UI. When PR merged or closed GHA should undeploy the code from `Hotfix` environment. ### Hotfix Environment @@ -125,7 +142,7 @@ What we implement as part of our approach and the specific use cases we address ### Hotfix Release workflow -- On each commit into a “_Release branch_” `release/{version}` “_Hotfix release workflow_” triggers. It will build and test the latest code from the branch, create a new release with increased patched version and deploy it with approval gate to the `Production` environment. +- On each commit into a “_Release branch_” `release/{version}` “_Hotfix release workflow_” triggers. It will build and test the latest code from the branch, create a new release with increased patched version and deploy it with approval gate to the `Production` environment. - Developer should also take care of the hotfix to the `main` branch, for which a reintegration PR will be created automatically. ### Deployments @@ -186,14 +203,28 @@ We recommend a monorepo for non-GitHub enterprise users. If we take this approac
Automated Rollbacks
-
Automated triggering of rollbacks is not supported. Manually initiated, automatic rollbacks are supported, but should be triggered by reverting the pull request and using the aforementioned release process.
+
+ Automated triggering of rollbacks is not supported. Manually initiated, + automatic rollbacks are supported, but should be triggered by reverting the + pull request and using the aforementioned release process. +
Provision environments
-
Provision k8s clusters, third party services for any environments should be performed as separate mile stone. We expect already have K8S credentials for deployments
+
+ Provision k8s clusters, third party services for any environments should be + performed as separate mile stone. We expect already have K8S credentials for + deployments +
Define Docker based third party services
-
Third party services running in docker should be declared individually per application. This is Developers field of work.
+
+ Third party services running in docker should be declared individually per + application. This is Developers field of work. +
Key Metrics & Observability
- Monitoring CI pipelines and tests for visibility (e.g. with with Datadog CI) is not factored in but can be added at a later time.

+ Monitoring CI pipelines and tests for visibility (e.g. with with Datadog CI) + is not factored in but can be added at a later time. +
+
[https://www.datadoghq.com/blog/datadog-ci-visibility/](https://www.datadoghq.com/blog/datadog-ci-visibility/)
@@ -228,9 +259,11 @@ Links to any supporting documentation or pages, if any ## **Security Risk Assessment** -
+ +
-
+ +
The release engineering system consists of two main components - _Github Action Cloud_ (a.k. _GHA_) and _Github Action Runners_ (a.k. _GHA-Runners_). @@ -244,13 +277,13 @@ When a new `_Workflow Run_` is initialized, GHA issues a new unique _**Default t _**Default token**_ scoped to a repository (or another Github resource) that was the source of the triggered event. On the provided diagram, it is the _Application Repository._ -If a workflow needs to pull source code from another repository, we have to use _Personal Access Token (__**PAT**__),_ which had to be issued preliminarily. On the diagram, this is ‘**PAT PRIVATE GHA' (4)** that we use to pull the organization's private actions used as steps in GHA workflows. +If a workflow needs to pull source code from another repository, we have to use _Personal Access Token (****PAT****),_ which had to be issued preliminarily. On the diagram, this is ‘**PAT PRIVATE GHA' (4)** that we use to pull the organization's private actions used as steps in GHA workflows. In a moment GHA-Runner pulled the ‘_Application_’ source code and ‘_Private Actions_’ it is ready to perform real work - build docker images, run tests, deploy to specific environments and interact with Github for a better developer experience. -To interact with AWS services `_Workflow Run_` assumes **CICD (5)** **IAM role** that grants permissions to work with ECR and to assume **Helm (5)** **IAM roles** from another account. The **'Helm' IAM role** is useful to **Authenticate (6)** on a specific EKS cluster and to deploy there. Assuming **CICD IAM role** is possible only on '_Self-hosted GHA-_Runners’ as EC2 Instance credentials used for initial interaction with AWS. +To interact with AWS services `_Workflow Run_` assumes **CICD (5)** **IAM role** that grants permissions to work with ECR and to assume **Helm (5)** **IAM roles** from another account. The **'Helm' IAM role** is useful to **Authenticate (6)** on a specific EKS cluster and to deploy there. Assuming **CICD IAM role** is possible only on '\_Self-hosted GHA-\_Runners’ as EC2 Instance credentials used for initial interaction with AWS. -_**Default token**_ fits all needs except one - creating a _Hotfix Reintegration Pull Request._ for that functionally we need to implement a workaround. On the diagram provided one of the possible workarounds - using _**PAT to Create PRs (7)**_ with wider permissions_**.**_ +_**Default token**_ fits all needs except one - creating a _Hotfix Reintegration Pull Request._ for that functionally we need to implement a workaround. On the diagram provided one of the possible workarounds - using _**PAT to Create PRs (7)**_ with wider permissions***.*** ### Registration token @@ -311,15 +344,17 @@ It is impossible to use the _‘Default Github token’_ as it is scoped to one To get this PAT with minimal required permissions follows these steps: -1. Create a technical user on Github ( like `bot+private-gha@example.com` ) +1. Create a technical user on Github ( like `bot+private-gha@example.com` ) 2. Added the user to the `Private Actions` repository with 'read-only' permissions (`https://github.com/{organization}/{repository}/settings/access`) -3.
+3. +
-Generate a PAT for the technical user with that level of permissions [https://github.com/settings/tokens/new](https://github.com/settings/tokens/new) +Generate a PAT for the technical user with that level of permissions [https://github.com/settings/tokens/new](https://github.com/settings/tokens/new) -
+ +
4. Save the PAT as organization secret with name `GITHUB_PRIVATE_ACTIONS_PAT` (`https://github.com/organizations/{organization}/settings/secrets/actions`) diff --git a/docs/layers/software-delivery/design-decisions/decide-on-release-engineering-strategy.mdx b/docs/layers/software-delivery/design-decisions/decide-on-release-engineering-strategy.mdx index fa1ba9339..868a38cec 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-release-engineering-strategy.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-release-engineering-strategy.mdx @@ -2,9 +2,12 @@ title: Decide on Release Engineering Strategy sidebar_label: Software Delivery description: Decide on how to release software changes to production +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -18,15 +21,16 @@ Gitflow is a branching strategy that allows for parallel development by creating The Gitflow branching model consists of the following branches: - - **Master Branch:** Represents the production-ready code and is typically only updated when a new release is made. - - **Develop Branch:** Represents the latest development code and serves as a parent branch for feature branches. - - **Feature Branches:** Created from the develop branch, feature branches are used to develop new features or functionality. Once the feature is complete, it is merged back into the develop branch. - - **Release Branches:** Created from the develop branch, release branches are used to prepare for a new production release. Any bug fixes and final testing are done on this branch before being merged back into both the develop and master branches. - - **Hotfix Branches:** Similar to release branches, hotfix branches are created from the master branch to address any critical bugs or issues discovered in the production code. Once the hotfix is complete, it is merged back into both the master and develop branches. +- **Master Branch:** Represents the production-ready code and is typically only updated when a new release is made. +- **Develop Branch:** Represents the latest development code and serves as a parent branch for feature branches. +- **Feature Branches:** Created from the develop branch, feature branches are used to develop new features or functionality. Once the feature is complete, it is merged back into the develop branch. +- **Release Branches:** Created from the develop branch, release branches are used to prepare for a new production release. Any bug fixes and final testing are done on this branch before being merged back into both the develop and master branches. +- **Hotfix Branches:** Similar to release branches, hotfix branches are created from the master branch to address any critical bugs or issues discovered in the production code. Once the hotfix is complete, it is merged back into both the master and develop branches. The benefit of Gitflow is that it provides a clear path for changes to be made to the codebase, ensuring that production-ready code is only released from the master branch. It also allows for multiple developers to work on features and bug fixes in parallel without disrupting the development workflow. -
+ +
### Trunk-Based Strategy @@ -34,7 +38,8 @@ Trunk-based development is a branching strategy that allows for continuous integ The trunk-based branching model consists of the following branches: - - **Main (_Trunk_) Branch:** Represents the latest development code and serves as a parent branch for feature branches. - - **Feature Branches:** Created from the _Trunk_ branch, feature branches are used to develop new features or functionality. Once the feature is complete, it is merged back into the develop branch. +- **Main (_Trunk_) Branch:** Represents the latest development code and serves as a parent branch for feature branches. +- **Feature Branches:** Created from the _Trunk_ branch, feature branches are used to develop new features or functionality. Once the feature is complete, it is merged back into the develop branch. -
+ +
diff --git a/docs/layers/software-delivery/design-decisions/decide-on-release-promotion-strategy.mdx b/docs/layers/software-delivery/design-decisions/decide-on-release-promotion-strategy.mdx index 60aceb820..cb8de8197 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-release-promotion-strategy.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-release-promotion-strategy.mdx @@ -4,9 +4,12 @@ sidebar_label: "Release Promotion Strategy" sidebar_position: 100 refarch_id: REFARCH-419 description: Decide how releases are promoted from dev to production +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ### Problem and Context @@ -25,21 +28,25 @@ How that will be accomplished will depend on whether or not GitHub Enterprise fe #### Option A: Automatically Deploy to Staging on Every Release, Use GitHub Approval Steps for Production ##### Pros + - Natively supported by GitHub - Environment protection rules ensure RBAC restricts who can approve deployments ##### Cons + - Requires GitHub Enterprise, as GitHub Approvals, GitHub Environment protection rules (and Environment Secrets) are only available in GitHub Enterprise. #### Option B: Automatically Deploy to Staging on Every Release, Use Manual GitHub Action Workflow to Production Deployments ##### Pros + - Does not require GitHub Enterprise - Staging always represents the latest release ##### Cons + - No environment protection rules; anyone who can run the workflow can deploy. Mitigated by customizing the workflow with business logic to restrict it, but not supported by Cloud Posse today. #### Option C: Use Manual GitHub Action Workflow for Staging and Production Deployments @@ -63,6 +70,3 @@ How that will be accomplished will depend on whether or not GitHub Enterprise fe ### Related Design Decisions - [Decide on Database Schema Migration Strategy](/layers/data/design-decisions/decide-on-database-schema-migration-strategy) - - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-repositories-strategy.mdx b/docs/layers/software-delivery/design-decisions/decide-on-repositories-strategy.mdx index 05d4d889e..835ff9b35 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-repositories-strategy.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-repositories-strategy.mdx @@ -3,21 +3,26 @@ title: "Decide on Repositories Strategy" sidebar_label: "Repositories Strategy" sidebar_position: 100 description: "Decide whether to use monorepos or polyrepos" +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement ## Problem Statement + Deciding repository strategies for your codebase is a crucial choice because it can significantly impact your development processes, collaboration effectiveness, tooling, and architectural decisions. There are two main strategies for organizing source code repositories: [monorepo](https://en.wikipedia.org/wiki/Monorepo) and `polyrepo`. In a middle there can be a hybrid strategies: -* `multi monorepos` -* `monorepo as read-only proxy` -* `polyrepos & monorepos` + +- `multi monorepos` +- `monorepo as read-only proxy` +- `polyrepos & monorepos` The hybrid strategies inherit gains and loss of the main two. That's why focus on pros and const for the main repository structures. @@ -47,11 +52,12 @@ CI/CD pipelines contains less logic and works faster because it only has to proc Code that's shared between projects might have to be duplicated in each repository. #### Increased Management Overhead + Managing multiple repositories can be more complex and time-consuming. #### Complex Dependency Management -If libraries have interdependencies, it can be harder to manage versioning across multiple repositories. +If libraries have interdependencies, it can be harder to manage versioning across multiple repositories. ## Monorepo @@ -66,35 +72,41 @@ With all the code in one place, it's easy to share and reuse code across multipl This can lead to more consistent code, reduce duplication, and enhance productivity. #### Unified Versioning + A monorepo has a single source of truth for the current state of the system. #### Collaboration and Code Review + Developers can work together on code, have visibility of changes across the entire project, and perform code reviews more effectively. #### Simplified Dependency Management + All projects use the same version of third-party dependencies, which can make managing those dependencies easier. ### Challenges #### Scalability + As a codebase grows, it can become more challenging to manage and navigate a monorepo. #### Complex and slower CI/CD Pipelines + Continuous integration and deployment can become slower as your codebase grows because the pipeline may need to compile and test the entire codebase for every change. CI/CD pipelines for monorepo are complex and required special tooling such as [Bazel](https://bazel.build/), [Pants](https://www.pantsbuild.org/), [Please](https://please.build/) or [Buck](https://buck2.build/). #### Risk of Breaking Changes + A small change in one part of the codebase might break something else unexpectedly since everything is interconnected. #### Dummy Versioning + Whenever the entire monorepo is tagged, it automatically assigns this new tag to all code inside, including all hosted libraries. This could lead to the release of all these libraries under the new version number, even if many of these libraries have not been updated or modified in any way. - ## Recommendation We recommend using `Polyrepo` as a basic repository organization strategy because it leads to faster development cycle, @@ -110,9 +122,10 @@ use different patterns, but that lead to the complexity of the CI/CD pipelines a That's why we recommend having a consistent repository strategy, at least on a team level. ## References -* [Monorepo vs. polyrepo](https://github.com/joelparkerhenderson/monorepo-vs-polyrepo) -* [From a Single Repo, to Multi-Repos, to Monorepo, to Multi-Monorepo](https://css-tricks.com/from-a-single-repo-to-multi-repos-to-monorepo-to-multi-monorepo/) -* [Monorepo vs Polyrepo](https://earthly.dev/blog/monorepo-vs-polyrepo/) -* [Polyrepo vs. Monorepo - How does it impact dependency management?](https://www.endorlabs.com/blog/polyrepo-vs-monorepo-how-does-it-impact-dependency-management) -* [Monorepo Vs Polyrepo Architecture: A Comparison For Effective Software Development](https://webo.digital/blog/monorepo-vs-polyrepo-architecture/) -* + +- [Monorepo vs. polyrepo](https://github.com/joelparkerhenderson/monorepo-vs-polyrepo) +- [From a Single Repo, to Multi-Repos, to Monorepo, to Multi-Monorepo](https://css-tricks.com/from-a-single-repo-to-multi-repos-to-monorepo-to-multi-monorepo/) +- [Monorepo vs Polyrepo](https://earthly.dev/blog/monorepo-vs-polyrepo/) +- [Polyrepo vs. Monorepo - How does it impact dependency management?](https://www.endorlabs.com/blog/polyrepo-vs-monorepo-how-does-it-impact-dependency-management) +- [Monorepo Vs Polyrepo Architecture: A Comparison For Effective Software Development](https://webo.digital/blog/monorepo-vs-polyrepo-architecture/) +- diff --git a/docs/layers/software-delivery/design-decisions/decide-on-seeding-strategy-for-staging-environments.mdx b/docs/layers/software-delivery/design-decisions/decide-on-seeding-strategy-for-staging-environments.mdx index 9343ac3b5..8236219eb 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-seeding-strategy-for-staging-environments.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-seeding-strategy-for-staging-environments.mdx @@ -4,14 +4,19 @@ sidebar_label: "Seeding Strategy for Staging" sidebar_position: 100 refarch_id: REFARCH-423 description: Decide what data belongs in Staging +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem + Longer-lived staging environments need a dataset that closely resembles production. If this dataset becomes stale, we’ll not be effectively testing releases before they hit production. Restoring snapshots from production is not recommended. ## Considerations + - Should contain anonymized users, invalid email addresses - No CHD, PHI, PII must be contained in the database @@ -32,6 +37,5 @@ Longer-lived staging environments need a dataset that closely resembles producti Cloud Posse does not have a turnkey solution for seeding staging environments ::: -- ETL pipeline scrubs the data and refreshes the database weekly or monthly. (e.g. AWS Glue, GitHub Action Schedule Job) - +- ETL pipeline scrubs the data and refreshes the database weekly or monthly. (e.g. AWS Glue, GitHub Action Schedule Job) diff --git a/docs/layers/software-delivery/design-decisions/decide-on-self-hosted-github-runner-strategy.mdx b/docs/layers/software-delivery/design-decisions/decide-on-self-hosted-github-runner-strategy.mdx index 11f3b8722..06b1e39dc 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-self-hosted-github-runner-strategy.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-self-hosted-github-runner-strategy.mdx @@ -4,9 +4,12 @@ sidebar_label: "GitHub Runners" sidebar_position: 100 refarch_id: REFARCH-394 description: Decide on how to self-host GitHub Runners +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -39,7 +42,7 @@ This is our recommended approach ::: -Deploying these Runners on Kubernetes is possible using [actions-runner-controller](https://github.com/actions-runner-controller/actions-runner-controller). With this controller, a small-to-medium-sized cluster can house a large number of Runners (depending on their requested Memory and CPU resources), and these Runners can scale automatically using the controller’s `HorizontalRunnerAutoscaler` CRD. This has the benefit that it can scale to zero and leverages all the monitoring we have on the platform. This solution also allows for using a custom runner image without having to rebuild an AMI or modify a user-data script and re-launch instances, which would be necessary when deploying the Runners to EC2. +Deploying these Runners on Kubernetes is possible using [actions-runner-controller](https://github.com/actions-runner-controller/actions-runner-controller). With this controller, a small-to-medium-sized cluster can house a large number of Runners (depending on their requested Memory and CPU resources), and these Runners can scale automatically using the controller’s `HorizontalRunnerAutoscaler` CRD. This has the benefit that it can scale to zero and leverages all the monitoring we have on the platform. This solution also allows for using a custom runner image without having to rebuild an AMI or modify a user-data script and re-launch instances, which would be necessary when deploying the Runners to EC2. `actions-runner-controller` also supports several various mechanisms for scaling the number of Runners: `PercentageRunnersBusy` simply scales the Runners up or down based on how many of them are currently busy, without having to maintain a list of repositories used by the Runners, which would be the case in `TotalNumberOfQueuedAndInProgressWorkflowRuns`. The most efficient and recommended option for horizontal auto-scaling using the `actions-runner-controller`, however, is to [enable the controller’s webhook server](https://github.com/actions-runner-controller/actions-runner-controller#webhook-driven-scaling) and configure the `HorizontalRunnerAutoscaler` to scale on GitHub webhook events (for event name: `check_run`, type: `created`, status: `queued`). Note that the `actions-runner-controller` does not have any logic to automatically create the webhook configuration in GitHub, and hence, the webhook server needs to be exposed and configured manually in GitHub or using the GitHub API. If using `aws-load-balancer-controller`, ensure that within `actions-runner-controller` Helm chart, `githubWebhookServer.ingress.enabled` is set to `true`, and if using `external-dns`, set `githubWebhookServer.ingress.annotations` to include an `external-dns.alpha.kubernetes.io/alias`. Finally, configure the webhook in GitHub to match the hostname and port of the endpoint corresponding to the newly-created Ingress object. @@ -79,5 +82,3 @@ The GitHub Actions Runners often need to perform continuous integration tasks su - [Decide on Self-Hosted GitHub Runner Strategy](/layers/software-delivery/design-decisions/decide-on-self-hosted-github-runner-strategy) - [Decide on Strategy for Continuous Integration](/layers/software-delivery/design-decisions/decide-on-strategy-for-continuous-integration) - [Decide on GitHub Actions Workflow Organization Strategy](/layers/software-delivery/design-decisions/decide-on-github-actions-workflow-organization-strategy) - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-continuous-integration.mdx b/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-continuous-integration.mdx index a13757d6e..7cbd63aef 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-continuous-integration.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-continuous-integration.mdx @@ -4,9 +4,12 @@ sidebar_label: "Continuous Integration" sidebar_position: 100 refarch_id: REFARCH-426 description: Decide on what happens during the CI process +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; # Decide on Strategy for Continuous Integration @@ -45,5 +48,3 @@ Deploy a preview environment and then test it. Note: not all services are suitab - Superlinter ## Options for Security Tests - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-developer-environments.mdx b/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-developer-environments.mdx index d0dae07a1..6857fe1f5 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-developer-environments.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-developer-environments.mdx @@ -4,9 +4,12 @@ sidebar_label: "Developer Environments" sidebar_position: 100 refarch_id: REFARCH-424 description: Decide on how developers will work with the application locally +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Versioning Considerations @@ -35,5 +38,3 @@ import KeyPoints from '@site/src/components/KeyPoints'; 2. Skaffold 3. Docker Compose - - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-managing-and-orchestrating-secrets.mdx b/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-managing-and-orchestrating-secrets.mdx index 331f95333..94b7c6c86 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-managing-and-orchestrating-secrets.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-managing-and-orchestrating-secrets.mdx @@ -3,9 +3,12 @@ title: "Decide on Strategy for Managing and Orchestrating Secrets" sidebar_label: "Secrets Management" refarch_id: REFARCH-538 description: How to manage and orchestrate secrets for your applications +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement diff --git a/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-preview-environments-e-g-review-apps.mdx b/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-preview-environments-e-g-review-apps.mdx index 8b0ea19e8..a2303f6ce 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-preview-environments-e-g-review-apps.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-strategy-for-preview-environments-e-g-review-apps.mdx @@ -4,9 +4,12 @@ sidebar_label: "Preview Environments" sidebar_position: 100 refarch_id: REFARCH-425 description: Decide how preview environments will work +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Considerations @@ -76,7 +79,7 @@ Slight improvement over using `helmfile` with the `raw` chart. - Requires granted direct Kubernetes administrative access GitHub Action runners in order to deploy helm charts - GitHub Action runners will need direct access to read any secrets needed to deploy the helm releases. -(mitigation is to use something like `sops-operator` or `external-secrets` operator) + (mitigation is to use something like `sops-operator` or `external-secrets` operator) ### Use GitHub actions with ArgoCD and helm @@ -98,7 +101,8 @@ For some additional context on ArgoCD [Decide on ArgoCD Architecture](/layers/so - URLs will be posted to GitHub Status API to that environments are directly reachable from PRs -
+ +
- Do we need to secure these environments? We recommend just locking down the ALB to internal traffic and using VPN @@ -130,13 +134,13 @@ This is more of a side note that not all microservices organizations are the sam 4. As a result of seeing performance needs to scale -1. If this is the case, we _technically_ don’t need to do microservices; we just need to be able to control the entry point & routing (e.g. a “Microservices Monolith”) +5. If this is the case, we _technically_ don’t need to do microservices; we just need to be able to control the entry point & routing (e.g. a “Microservices Monolith”) -2. For this to work, the monolith needs to be able to communicate with itself as a service (e.g. gRPC) for local development. We see this with Go microservices; this can be done when it’s necessary as a pattern to scale endpoints +6. For this to work, the monolith needs to be able to communicate with itself as a service (e.g. gRPC) for local development. We see this with Go microservices; this can be done when it’s necessary as a pattern to scale endpoints -3. Preview environments can still use the gRPC but over localhost +7. Preview environments can still use the gRPC but over localhost -5. As a result of wanting to experiment with multiple versions of the same service (E.g. using a service mesh) +8. As a result of wanting to experiment with multiple versions of the same service (E.g. using a service mesh) ## Related Design Decisions @@ -146,4 +150,3 @@ This is more of a side note that not all microservices organizations are the sam Internal preview environments cannot accept webhook callbacks from external services like twilio ::: - diff --git a/docs/layers/software-delivery/design-decisions/decide-on-terraform-configuration-pattern-for-application-reposi.mdx b/docs/layers/software-delivery/design-decisions/decide-on-terraform-configuration-pattern-for-application-reposi.mdx index e41986a2d..23872f9a3 100644 --- a/docs/layers/software-delivery/design-decisions/decide-on-terraform-configuration-pattern-for-application-reposi.mdx +++ b/docs/layers/software-delivery/design-decisions/decide-on-terraform-configuration-pattern-for-application-reposi.mdx @@ -4,9 +4,12 @@ sidebar_label: "Terraform Configuration Pattern" sidebar_position: 100 refarch_id: REFARCH-514 description: Decide how to configure applications +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement @@ -43,5 +46,3 @@ These Terraform resources are not limited to the AWS provider. Other valid types ## References - [How to Manage Terraform Dependencies in Micro-service Repositories](/learn/maintenance/tutorials/how-to-manage-terraform-dependencies-in-micro-service-repositori) - - diff --git a/docs/layers/software-delivery/design-decisions/design-decisions.mdx b/docs/layers/software-delivery/design-decisions/design-decisions.mdx index 4a352b18a..6ae707857 100644 --- a/docs/layers/software-delivery/design-decisions/design-decisions.mdx +++ b/docs/layers/software-delivery/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: Design Decisions sidebar_label: Review Design Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; -Review the key design decisions for how you'll implement CI/CD for your applications. + Review the key design decisions for how you'll implement CI/CD for your + applications. - + diff --git a/docs/layers/spacelift/design-decisions/decide-on-how-spacelift-will-use-external-private-modules.mdx b/docs/layers/spacelift/design-decisions/decide-on-how-spacelift-will-use-external-private-modules.mdx index dddbc1a9e..b7bd247db 100644 --- a/docs/layers/spacelift/design-decisions/decide-on-how-spacelift-will-use-external-private-modules.mdx +++ b/docs/layers/spacelift/design-decisions/decide-on-how-spacelift-will-use-external-private-modules.mdx @@ -4,12 +4,19 @@ sidebar_label: "Private Modules" sidebar_position: 100 refarch_id: REFARCH-535 description: How to securely access to private Terraform modules in Spacelift +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; -Sometimes there will be a need to host private Terraform modules that may exist outside the infrastructure monorepo. When this is the case, Spacelift needs access to these module registries or repositories. There are a few ways to go about it, with various tradeoffs to ensure secure and efficient module management while minimizing complexity and risk. + Sometimes there will be a need to host private Terraform modules that may + exist outside the infrastructure monorepo. When this is the case, Spacelift + needs access to these module registries or repositories. There are a few ways + to go about it, with various tradeoffs to ensure secure and efficient module + management while minimizing complexity and risk. **Date**: **19 Oct 2021** @@ -109,5 +116,3 @@ Cloud Posse recommends avoiding this approach. - [https://www.terraform.io/language/state/remote-state-data](https://www.terraform.io/language/state/remote-state-data) - [https://maelvls.dev/gh-actions-with-tf-private-repo/](https://maelvls.dev/gh-actions-with-tf-private-repo/) - - diff --git a/docs/layers/spacelift/design-decisions/decide-on-spacelift-administrative-stack-auto-deployment.mdx b/docs/layers/spacelift/design-decisions/decide-on-spacelift-administrative-stack-auto-deployment.mdx index 917e55c7a..56ebd3efb 100644 --- a/docs/layers/spacelift/design-decisions/decide-on-spacelift-administrative-stack-auto-deployment.mdx +++ b/docs/layers/spacelift/design-decisions/decide-on-spacelift-administrative-stack-auto-deployment.mdx @@ -4,12 +4,19 @@ sidebar_label: "Stack Auto-deployment" sidebar_position: 100 refarch_id: REFARCH-410 description: Balance auto-deployment with manual confirmation +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; - Spacelift administrative stacks support auto-deployment to streamline the process of provisioning new component stacks, but the ease of use and automation must be balanced against the risk of accidental deletions due to misconfiguration. Reviewing these considerations will help decide the best approach for your organization’s needs. + Spacelift administrative stacks support auto-deployment to streamline the + process of provisioning new component stacks, but the ease of use and + automation must be balanced against the risk of accidental deletions due to + misconfiguration. Reviewing these considerations will help decide the best + approach for your organization’s needs. ## Problem @@ -28,7 +35,8 @@ This admin stack is manually created in Spacelift (it’s how it knows about you When making changes or adding a new component to a stack configuration in Spacelift, those changes must be applied first by the administrative stack before Spacelift can do anything with it (e.g. show a terraform plan of this stack). -
+ +
## Considered Options @@ -65,5 +73,3 @@ If `autodeploy=false` on the admin stack, then new stacks will have to be manual - Users are frequently confused why their stack does not show any plans or why nothing is terraformed. - Users forget to confirm the changes to the Spacelift configuration because this implementation detail is not obvious to them. - - diff --git a/docs/layers/spacelift/design-decisions/decide-on-spacelift-worker-pool-architecture.mdx b/docs/layers/spacelift/design-decisions/decide-on-spacelift-worker-pool-architecture.mdx index 9ded7b207..4444c6c6a 100644 --- a/docs/layers/spacelift/design-decisions/decide-on-spacelift-worker-pool-architecture.mdx +++ b/docs/layers/spacelift/design-decisions/decide-on-spacelift-worker-pool-architecture.mdx @@ -5,12 +5,19 @@ sidebar_position: 100 sidebar_class_name: compact refarch_id: REFARCH-475 description: Scope Spacelift Workers for secure automation +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; - Spacelift Workers are deployed in VPCs with scoped IAM permissions to ensure robust automation capabilities while minimizing security risks. By default, we avoid automating certain core system accounts, but this can be adjusted as needed. Review these considerations to determine the best approach for your organization. + Spacelift Workers are deployed in VPCs with scoped IAM permissions to ensure + robust automation capabilities while minimizing security risks. By default, we + avoid automating certain core system accounts, but this can be adjusted as + needed. Review these considerations to determine the best approach for your + organization. ## Problem @@ -83,5 +90,3 @@ We typically deploy this configuration, but are leaning towards Option 1 instead - [How to Sign Up for Spacelift](/layers/spacelift) - [spacelift-worker-pool](//components/library/aws/spacelift/worker-pool/) (component) - - diff --git a/docs/layers/spacelift/design-decisions/design-decisions.mdx b/docs/layers/spacelift/design-decisions/design-decisions.mdx index 84b3936d3..1cc37da9c 100644 --- a/docs/layers/spacelift/design-decisions/design-decisions.mdx +++ b/docs/layers/spacelift/design-decisions/design-decisions.mdx @@ -2,12 +2,16 @@ title: Design Decisions sidebar_label: Review Decisions sidebar_position: 1 +tags: + - design-decision --- -import DocCardList from '@theme/DocCardList'; -import Intro from '@site/src/components/Intro'; + +import DocCardList from "@theme/DocCardList"; +import Intro from "@site/src/components/Intro"; -Review the key design decisions for how you'll leverage Spacelift for continuous delivery of your infrastructure with Terraform and Atmos. + Review the key design decisions for how you'll leverage Spacelift for + continuous delivery of your infrastructure with Terraform and Atmos. - + diff --git a/docs/quickstart/kickoff.mdx b/docs/quickstart/kickoff.mdx index 4715b84b8..7200fd8fa 100644 --- a/docs/quickstart/kickoff.mdx +++ b/docs/quickstart/kickoff.mdx @@ -4,37 +4,46 @@ sidebar_label: "Kick Off" sidebar_position: 2 describe: Set project expectations for Jumpstarts with Cloud Posse --- -import Link from '@docusaurus/Link' -import KeyPoints from '@site/src/components/KeyPoints' -import Steps from '@site/src/components/Steps' -import Step from '@site/src/components/Step' -import StepNumber from '@site/src/components/StepNumber' -import Intro from '@site/src/components/Intro' -import ActionCard from '@site/src/components/ActionCard' -import PrimaryCTA from '@site/src/components/PrimaryCTA' -import TaskList from '@site/src/components/TaskList' -import Admonition from '@theme/Admonition'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; + +import Link from "@docusaurus/Link"; +import KeyPoints from "@site/src/components/KeyPoints"; +import Steps from "@site/src/components/Steps"; +import Step from "@site/src/components/Step"; +import StepNumber from "@site/src/components/StepNumber"; +import Intro from "@site/src/components/Intro"; +import ActionCard from "@site/src/components/ActionCard"; +import PrimaryCTA from "@site/src/components/PrimaryCTA"; +import TaskList from "@site/src/components/TaskList"; +import Admonition from "@theme/Admonition"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; - The kickoff call for [Quickstarts](/intro/path) ensures you get a smooth start with clear expectations. During the call, we will confirm your design decisions. After the call, we'll make sure everyone on your team has access to our weekly support workshops, and set you on your way with the action items. You'll quickly receive all the configurations customized to your requirements in about a day. + The kickoff call for [Quickstarts](/intro/path) ensures you get a smooth start + with clear expectations. During the call, we will confirm your design + decisions. After the call, we'll make sure everyone on your team has access to + our weekly support workshops, and set you on your way with the action items. + You'll quickly receive all the configurations customized to your requirements + in about a day. - - **Review Design Decisions** Confirm your requirements answer any questions you may have - - **Cover Next Steps** Review what to expect after the call, and what you need to get started - - **Support** Introduce the available support options, including GitHub Discussions, Customer Workshops, and Office Hours + - **Review Design Decisions** Confirm your requirements answer any questions + you may have - **Cover Next Steps** Review what to expect after the call, and + what you need to get started - **Support** Introduce the available support + options, including GitHub Discussions, Customer Workshops, and Office Hours ## Preparing for the Kickoff Meeting This document outlines what to expect from your first call with Cloud Posse. In order to make the most of this meeting, please read through this document and come prepared with questions. In particular, please review the following: + - 1. Decide who will lead the project in your organization - 2. Ensure everyone who needs to be on the call is added to the invitation - 2. Read through the [Design Decisions](#review-design-decisions) and prepare questions and decisions - 3. Review the list of [Actions Items](#action-items) following this call + 1. Decide who will lead the project in your organization 2. Ensure everyone + who needs to be on the call is added to the invitation 2. Read through the + [Design Decisions](#review-design-decisions) and prepare questions and + decisions 3. Review the list of [Actions Items](#action-items) following this + call ## Kickoff Meeting Agenda @@ -44,6 +53,7 @@ This document outlines what to expect from your first call with Cloud Posse. In ### Introductions Here we will review who is on the call, what their roles are, and identify our technical point of contact at Cloud Posse. We will also review the working timezones of the teams. + @@ -83,6 +93,7 @@ This document outlines what to expect from your first call with Cloud Posse. In This is a great opportunity to get your questions answered and to get help with your project. + @@ -95,12 +106,14 @@ This document outlines what to expect from your first call with Cloud Posse. In This is a good way to keep up with the latest developments and trends in the DevOps community. Sign up at [cloudposse.com/office-hours](https://cloudposse.com/office-hours/) + ### Join our SweetOps Slack Community If you are looking for a community of like-minded DevOps practitioners, we invite you to join our [SweetOps Slack](https://slack.sweetops.com/). + @@ -109,6 +122,7 @@ This document outlines what to expect from your first call with Cloud Posse. In Review the foundational Design Decisions. + - [ ] [Decide on Terraform Version](/layers/project/design-decisions/decide-on-terraform-version) - [ ] [Decide on Namespace Abbreviation](/layers/project/design-decisions/decide-on-namespace-abbreviation) - [ ] [Decide on Infrastructure Repository Name](/layers/project/design-decisions/decide-on-infrastructure-repository-name) - [ ] [Decide on Email Address Format for AWS Accounts](/layers/accounts/design-decisions/decide-on-email-address-format-for-aws-accounts) @@ -119,7 +133,12 @@ This document outlines what to expect from your first call with Cloud Posse. In - [ ] [Decide on Service Discovery Domain](/layers/network/design-decisions/decide-on-service-discovery-domain) - [ ] [Decide on Vanity Domain](/layers/network/design-decisions/decide-on-vanity-branded-domain) - [ ] [Decide on Release Engineering Strategy](/layers/software-delivery/design-decisions/decide-on-release-engineering-strategy) + + + These are the design decisions you can customize as part of the Quickstart package. All other decisions are pre-made for you, but you’re welcome to review them. If you’d like to make additional changes, [let us know—we’re happy to provide a quote](https://cloudposse.com/meet). + + @@ -132,52 +151,62 @@ Cloud Posse has noticed several patterns that lead to successful projects. ### Come to Customer Workshops Prepared Review six pagers and documentation before workshops. This will help you to know what questions to ask. Coming unprepared will lead to a lot of questions and back-and-forth. This will slow down material resulting in less time for new material. + ### Take Initiative The most successful customers take initiative to make customizations to their Reference Architecture. This is a great way to make the Reference Architecture your own. It also helps to build a deeper understanding of the Reference Architecture and how it works. + ### Cameras On We recommend that all participants have their cameras on during our Customer Workshops. This helps to build trust and rapport. It also helps to keep everyone engaged and focused. This also lets us gage how everyone is understanding the material. If you are having trouble understanding something, please ask questions. + ### Ask Questions We encourage you to ask questions. We want to make sure that everyone understands the material. We also want to make sure that we are providing the right level of detail. Our meetings are intended to be interactive and encourage conversation. Please feel free to interject at any time if you have a question or a comment to add to the discussion. + ### Participate in our Slack Community We encourage you to participate in our public Slack channels. This is a great way to get help and to learn from others. We have a lot of customers who have been through the same process and can provide valuable insights. We also have a lot of Cloud Posse engineers who are available to help answer questions. + ### Attend Weekly Office Hours Both the [Shared Customer Workshops](#shared-customer-workshop) and [Community Office Hours](#community-office-hours) are great opportunities to ask questions and get help. + ### Read our Documentation You can always find how-to guides, design decisions, and other helpful pages at [docs.cloudposse.com](/) + ### Take the Next Step Don't wait! Keep the momentum going by taking the next step. If you have questions, ask them. If you need help, ask for it. We are here to help you succeed. + - After our kickoff call, there are several action items for you to consider based on your goals. Not every item may be relevant, but please review them and take action on the ones that apply to you. + After our kickoff call, there are several action items for you to consider + based on your goals. Not every item may be relevant, but please review them + and take action on the ones that apply to you. Next Step diff --git a/docs/resources/legacy/design-decisions/decide-on-api-gateway-requirements.mdx b/docs/resources/legacy/design-decisions/decide-on-api-gateway-requirements.mdx index 00f6bf6e9..1b82a4413 100644 --- a/docs/resources/legacy/design-decisions/decide-on-api-gateway-requirements.mdx +++ b/docs/resources/legacy/design-decisions/decide-on-api-gateway-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on API Gateway Requirements" sidebar_label: "API Gateway" sidebar_position: 100 refarch_id: REFARCH-540 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement @@ -14,6 +17,7 @@ import KeyPoints from '@site/src/components/KeyPoints'; Amazon API Gateway is an AWS service designed to simplify publishing highly-scalable REST, HTTP, and WebSocket APIs. These API Gateways act as a central point of access at the edge and can access backend services running on EC2, EKS, ECS, Lambda, and AWS Services directly, such as DynamoDB, S3, or SQS. The API Gateway has several benefits over a conventional ALB in that it’s optimized for APIs: namely, it can authenticate requests, cache, rate-limiting, feature flagging, a/b testing, rewrite requests/responses, aggregate requests, etc. It’s arguably a simpler alternative to using something like a Service Mesh. ## Common Scenarios + There are several common use cases for API Gateway, some of the most common of which are detailed below. ### REST API @@ -27,13 +31,13 @@ Choose REST APIs if you need features such as API keys, per-client throttling, r At its core, an API Gateway REST API comprises resources (e.g. customers) and methods. A resource is a logical entity that can be accessed through a resource path. A method corresponds to a REST API request submitted by the user of your API and the response returned to the user. > [RESTful APIs have strong controls](https://aws.amazon.com/blogs/compute/from-poll-to-push-transform-apis-using-amazon-api-gateway-rest-apis-and-websockets/#:~:text=RESTful%20APIs%20have%20strong%20controls%20to%20ensure%20that%20user%20requests%20for%20data%20are%20validated%20and%20given%20guardrails%20for%20the%20intended%20query%20purpose.%20This%20helps%20prevent%20rogue%20requests%20and%20unforeseen%20impacts%20to%20the%20analytics%20environment%2C%20especially%20when%20exposed%20to%20a%20large%20number%20of%20users.) to ensure that user requests for data are validated and given guardrails for the intended query purpose. This helps prevent rogue requests and unforeseen impacts to the analytics environment, especially when exposed to a large number of users. -For example, `/customers` could be the path of a resource representing the business’s customers. A resource supports one or more operations defined by standard HTTP verbs such as GET, POST, PUT, PATCH, and DELETE. A combination of a resource path and an operation identifies a method of the API. For example, a `POST /customers` method could add a new customer, and a `GET /customers` method could return a list of all of the customers. +> For example, `/customers` could be the path of a resource representing the business’s customers. A resource supports one or more operations defined by standard HTTP verbs such as GET, POST, PUT, PATCH, and DELETE. A combination of a resource path and an operation identifies a method of the API. For example, a `POST /customers` method could add a new customer, and a `GET /customers` method could return a list of all of the customers. The API caller doesn't need to know where the requested data is stored and fetched from on the backend. In API Gateway REST APIs, the front-end interface is encapsulated by _method requests_ and _method responses_. The API interfaces with the backend by means of _integration requests_ and _integration responses_. For example, with DynamoDB as the backend, the API developer sets up the integration request to forward the incoming method request to the chosen backend (DynamoDB). The setup includes specifications of a DynamoDB action, required IAM role and policies, and required input data transformation. The backend returns the result to API Gateway as an integration response. -To return the integration response to the client (method response), you can configure the integration response to map response parameters from integration to method. You can also translate the output data format of the backend to that of the front end (e.g. map specific DynamoDB columns to the JSON response), if necessary. API Gateway enables you to define a schema or model for the [payload](https://en.wikipedia.org/wiki/Payload_(computing)) to facilitate setting up the body mapping template. +To return the integration response to the client (method response), you can configure the integration response to map response parameters from integration to method. You can also translate the output data format of the backend to that of the front end (e.g. map specific DynamoDB columns to the JSON response), if necessary. API Gateway enables you to define a schema or model for the [payload]() to facilitate setting up the body mapping template. In addition to the functionality listed above, API Gateway REST APIs also provide additional management functionality such as: @@ -57,7 +61,7 @@ The [Use API Gateway REST API vs HTTP API](/resources/adrs/adopted/use-api-gatew ### WebSocket API :::info - Choose a WebSocket API to push results to your clients. Note, consider mixing and matching REST APIs and WebSocket APIs. See [https://aws.amazon.com/blogs/compute/from-poll-to-push-transform-apis-using-amazon-api-gateway-rest-apis-and-websockets/](https://aws.amazon.com/blogs/compute/from-poll-to-push-transform-apis-using-amazon-api-gateway-rest-apis-and-websockets/) +Choose a WebSocket API to push results to your clients. Note, consider mixing and matching REST APIs and WebSocket APIs. See [https://aws.amazon.com/blogs/compute/from-poll-to-push-transform-apis-using-amazon-api-gateway-rest-apis-and-websockets/](https://aws.amazon.com/blogs/compute/from-poll-to-push-transform-apis-using-amazon-api-gateway-rest-apis-and-websockets/) ::: @@ -110,6 +114,3 @@ See also:[Decide on WAF Requirements/Strategy](/layers/security-and-compliance/d - [https://aws.amazon.com/blogs/compute/from-poll-to-push-transform-apis-using-amazon-api-gateway-rest-apis-and-websockets/](https://aws.amazon.com/blogs/compute/from-poll-to-push-transform-apis-using-amazon-api-gateway-rest-apis-and-websockets/) - [https://docs.aws.amazon.com/apigateway/latest/developerguide/http-api-vs-rest.html](https://docs.aws.amazon.com/apigateway/latest/developerguide/http-api-vs-rest.html) - - - diff --git a/docs/resources/legacy/design-decisions/decide-on-cloudfront-requirements.mdx b/docs/resources/legacy/design-decisions/decide-on-cloudfront-requirements.mdx index 310d9a0a8..106c0ce11 100644 --- a/docs/resources/legacy/design-decisions/decide-on-cloudfront-requirements.mdx +++ b/docs/resources/legacy/design-decisions/decide-on-cloudfront-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on CloudFront Requirements" sidebar_label: "CloudFront Requirements" sidebar_position: 100 refarch_id: REFARCH-530 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement @@ -21,9 +24,8 @@ CloudFront also offers the ability to limit access to content based on the geogr In order to implement your CDN strategy with CloudFront, we’ll need to know a little bit more about how you currently use it and how you intend to use it. Any combination of one or more of the options below is supported but will need to be implemented differently. Keep in mind, as soon as you introduce a CDN, you have to solve the cache invalidation problem. -> _There are only two hard things in Computer Science: cache invalidation and naming things._ -> _-- Phil Karlton_ -[https://www.karlton.org/2017/12/naming-things-hard/](https://www.karlton.org/2017/12/naming-things-hard/) +> _There are only two hard things in Computer Science: cache invalidation and naming things._ > _-- Phil Karlton_ +> [https://www.karlton.org/2017/12/naming-things-hard/](https://www.karlton.org/2017/12/naming-things-hard/) ### Option 1: Origin Acceleration for Dynamic Content @@ -39,7 +41,7 @@ If you intend to also serve static assets behind a CDN, see _Option 2._ - Reduce the compute capacity required to serve cacheable content - Improve SEO -[https://www.semrush.com/blog/how-fast-is-fast-enough-page-load-time-and-your-bottom-line/](https://www.semrush.com/blog/how-fast-is-fast-enough-page-load-time-and-your-bottom-line/) + [https://www.semrush.com/blog/how-fast-is-fast-enough-page-load-time-and-your-bottom-line/](https://www.semrush.com/blog/how-fast-is-fast-enough-page-load-time-and-your-bottom-line/) - Relatively easy to deploy (effectiveness will depend on cache rules and cachabilty of content) @@ -51,7 +53,7 @@ If you intend to also serve static assets behind a CDN, see _Option 2._ - Conceal your origin servers from attackers -- More easily mitigate DoS/DDoS attacks +- More easily mitigate DoS/DDoS attacks #### Cons @@ -115,7 +117,7 @@ Common use-cases include Website Security & Privacy, Dynamics server-side applic #### Cons - There are tons of restrictions on Edge lambda functions intended to ensure they are ultra-performant and do not tax edge locations. Make sure you’re aware of them. -[https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/edge-functions-restrictions.html](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/edge-functions-restrictions.html) + [https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/edge-functions-restrictions.html](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/edge-functions-restrictions.html) - Websites that rely on edge content manipulation will require some compensating controls for local development (harder to test and debug) @@ -124,9 +126,7 @@ Common use-cases include Website Security & Privacy, Dynamics server-side applic ## References - **Lambda@Edge Design Best Practices** -[https://aws.amazon.com/blogs/networking-and-content-delivery/lambdaedge-design-best-practices/](https://aws.amazon.com/blogs/networking-and-content-delivery/lambdaedge-design-best-practices/) + [https://aws.amazon.com/blogs/networking-and-content-delivery/lambdaedge-design-best-practices/](https://aws.amazon.com/blogs/networking-and-content-delivery/lambdaedge-design-best-practices/) - **Service Quotas (Limitations)** -[https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/cloudfront-limits.html](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/cloudfront-limits.html) - - + [https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/cloudfront-limits.html](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/cloudfront-limits.html) diff --git a/docs/resources/legacy/design-decisions/decide-on-cognito-requirements.mdx b/docs/resources/legacy/design-decisions/decide-on-cognito-requirements.mdx index 561ffd680..e71126617 100644 --- a/docs/resources/legacy/design-decisions/decide-on-cognito-requirements.mdx +++ b/docs/resources/legacy/design-decisions/decide-on-cognito-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on Cognito Requirements" sidebar_label: "Cognito Requirements" sidebar_position: 100 refarch_id: REFARCH-525 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Overview @@ -53,5 +56,3 @@ We have seen customers take two approaches to migration: ### User Pools & Identity Providers Before implementing Cognito, we need to document the User Pool(s) you would like to provision along with any Identity Providers you would like to use. - - diff --git a/docs/resources/legacy/design-decisions/decide-on-iam-roles-for-github-action-runners.mdx b/docs/resources/legacy/design-decisions/decide-on-iam-roles-for-github-action-runners.mdx index 4bb5efd98..d18a38dec 100644 --- a/docs/resources/legacy/design-decisions/decide-on-iam-roles-for-github-action-runners.mdx +++ b/docs/resources/legacy/design-decisions/decide-on-iam-roles-for-github-action-runners.mdx @@ -3,9 +3,12 @@ title: "Decide on IAM Roles for GitHub Action Runners" sidebar_label: "IAM Roles for GitHub Action Runners" sidebar_position: 100 refarch_id: REFARCH-305 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -21,6 +24,7 @@ The trust relationship of this IAM role depends on how the Runners are hosted: Regardless of the trust relationship, the IAM roles themselves need to be defined.GitHub Action Workflows regularly use third-party actions. These actions execute on the self-hosted runners and have whatever access the runners have, including VPC connectivity and IAM permissions. ## Considered Options + Some possible configurations (and combinations thereof) include: - A Runner without any IAM roles (e.g. for unit tests in a CI pipeline). @@ -39,5 +43,3 @@ Some possible configurations (and combinations thereof) include: - [Decide on Self-Hosted GitHub Runner Strategy](/layers/software-delivery/design-decisions/decide-on-self-hosted-github-runner-strategy) - [Decide on Strategy for Continuous Integration](/layers/software-delivery/design-decisions/decide-on-strategy-for-continuous-integration) - [Decide on GitHub Actions Workflow Organization Strategy](/layers/software-delivery/design-decisions/decide-on-github-actions-workflow-organization-strategy) - - diff --git a/docs/resources/legacy/design-decisions/decide-on-kinesis-requirements.mdx b/docs/resources/legacy/design-decisions/decide-on-kinesis-requirements.mdx index 4d264dfa2..b636abb45 100644 --- a/docs/resources/legacy/design-decisions/decide-on-kinesis-requirements.mdx +++ b/docs/resources/legacy/design-decisions/decide-on-kinesis-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on Kinesis Requirements" sidebar_label: "Kinesis Requirements" sidebar_position: 100 refarch_id: REFARCH-527 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Context and Problem Statement @@ -24,20 +27,22 @@ We have terraform support for [https://github.com/cloudposse/terraform-aws-kines We have previously implemented data firehose for customer-specific applications, but do not have a generalized component for this. ### Option 1: AWS Kinesis Data Streams + In order to provision the kinesis streams component, we’ll need to know more about how it will be used. -| | | | -| ----- | ----- | ----- | -|**Name(s) of the Streams** | What are the names of the streams? or just provide some examples | | -|**Region** | AWS Region for the cluster | | -|**Number of Shards** | The number of shards to provision for the stream. | | -|**Retention Period** | Length of time data records are accessible after they are added to the stream. The maximum value is 168 hours. Minimum value is 24. | | -|**Shard Level Metrics** | A list of shard-level CloudWatch metrics to enabled for the stream. Options are IncomingBytes, OutgoingBytes | | -|**Enforce Consumer Deletion** | Forcefully delete stream consumers before destroying the stream | | -|**Encryption Type** | The encryption type to use. Acceptable values are `NONE` and `KMS` | | -|**Steaming Mode** | The capacity mode of the stream. Must be either `PROVISIONED` or `ON_DEMAND`. | | +| | | | +| ----------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- | --- | +| **Name(s) of the Streams** | What are the names of the streams? or just provide some examples | | +| **Region** | AWS Region for the cluster | | +| **Number of Shards** | The number of shards to provision for the stream. | | +| **Retention Period** | Length of time data records are accessible after they are added to the stream. The maximum value is 168 hours. Minimum value is 24. | | +| **Shard Level Metrics** | A list of shard-level CloudWatch metrics to enabled for the stream. Options are IncomingBytes, OutgoingBytes | | +| **Enforce Consumer Deletion** | Forcefully delete stream consumers before destroying the stream | | +| **Encryption Type** | The encryption type to use. Acceptable values are `NONE` and `KMS` | | +| **Steaming Mode** | The capacity mode of the stream. Must be either `PROVISIONED` or `ON_DEMAND`. | | ### Option 2: AWS Kinesis Data Firehose + We’ll need more information about how it will be used to provision the firehose. Implementing the component will likely be highly custom to your use case. Standard use-cases are: @@ -55,8 +60,7 @@ Standard use-cases are: - HTTP Endpoint ## References + - [https://registry.terraform.io/providers/hashicorp/aws/latest/docs/reference/kinesis_stream](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/reference/kinesis_stream) - [https://registry.terraform.io/providers/hashicorp/aws/latest/docs/reference/kinesis_firehose_delivery_stream](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/reference/kinesis_firehose_delivery_stream) - - diff --git a/docs/resources/legacy/design-decisions/decide-on-kms-requirements.mdx b/docs/resources/legacy/design-decisions/decide-on-kms-requirements.mdx index 92ef444d4..25251e4d6 100644 --- a/docs/resources/legacy/design-decisions/decide-on-kms-requirements.mdx +++ b/docs/resources/legacy/design-decisions/decide-on-kms-requirements.mdx @@ -3,9 +3,12 @@ title: "Decide on KMS Requirements" sidebar_label: "KMS Requirements" sidebar_position: 100 refarch_id: REFARCH-532 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; AWS Key Management Service (AWS KMS) makes it easy to create and manage cryptographic keys and control their use across various AWS services and in your applications. AWS KMS is a secure and resilient service that uses hardware security modules that have been validated under FIPS 140-2, or are in the process of being validated, to protect your keys. AWS KMS is integrated with AWS CloudTrail to provide you with logs of all key usage to help meet your regulatory and compliance needs. @@ -30,6 +33,7 @@ Nevertheless, because access to data is difficult to control, some certification KMS keys reside in AWS-managed hardware security modules that are physically tied to a single region. Each key can only be used in the region in which it was created. This can, in some sense, enforce Data Localization requirements, although it remains possible to decrypt data in the region and then transport the decrypted data anywhere on the internet. :::info + - **Data residency** refers to where a business, industry body or government specifies that their data is stored in a geographical location of their choice, usually for regulatory or policy reasons. - **Data sovereignty** refers to who has power over data. For example, data stored in Germany is subject to the laws of both German and the European Union. @@ -56,7 +60,7 @@ As documented, AWS owned keys are practically invisible to customers, so your on ### Key Identifiers -KMS keys can have a few different [identifiers](https://docs.aws.amazon.com/kms/latest/developerguide/concepts.html#key-id), usually called `KeyId`. When created, they are immediately given a _Key ID_ (not what is meant by `KeyId`) which uniquely identifies a KMS key within an account and Region. That Key ID, plus account and region info, constitutes a _Key ARN_, a unique, fully qualified identifier for the KMS key. Additionally, you can create an _Alias Name_ (actually, you can create several) that provides a friendly name for the key. Furthermore, that Alias Name can be associated with different keys at different points in time. More importantly, while you cannot control the Key ID when you create a customer managed key, you can control the value of the Alias Name for the key. This allows you to have the same Alias in every account and region, which then allows you to have the same configuration (referencing the Alias rather than the Key ID or ARN) in every account and every region even though they all point to different keys. Like a Key ID, an Alias can be used to constitute an Alias ARN, which is also a unique, fully qualified identifier for the alias, and for the KMS key, it represents. +KMS keys can have a few different [identifiers](https://docs.aws.amazon.com/kms/latest/developerguide/concepts.html#key-id), usually called `KeyId`. When created, they are immediately given a _Key ID_ (not what is meant by `KeyId`) which uniquely identifies a KMS key within an account and Region. That Key ID, plus account and region info, constitutes a _Key ARN_, a unique, fully qualified identifier for the KMS key. Additionally, you can create an _Alias Name_ (actually, you can create several) that provides a friendly name for the key. Furthermore, that Alias Name can be associated with different keys at different points in time. More importantly, while you cannot control the Key ID when you create a customer managed key, you can control the value of the Alias Name for the key. This allows you to have the same Alias in every account and region, which then allows you to have the same configuration (referencing the Alias rather than the Key ID or ARN) in every account and every region even though they all point to different keys. Like a Key ID, an Alias can be used to constitute an Alias ARN, which is also a unique, fully qualified identifier for the alias, and for the KMS key, it represents. AWS Managed keys are created automatically and are tied to a specific service, such as S3 or SSM, and are usually used by default when you specify encryption but do not specify a KMS key. They all have Alias Names beginning with `aws/` and are named according to the service or usage. For example `aws/s3` is used to encrypt S3 bucket data, while `aws/ebs` is used to encrypt EBS volumes. @@ -75,7 +79,7 @@ Every encrypted object in an S3 bucket is encrypted with a different data key. T ## Considered Options -### Option 1: Use AWS Managed Keys (Recommended where applicable) +### Option 1: Use AWS Managed Keys (Recommended where applicable) :::tip The simplest solution is to use encryption keys as defense in depth, without attempting to leverage keys as additional access controls or monitoring opportunities. @@ -180,5 +184,3 @@ To verify data residency and data sovereignty with multi-Region keys, you must i - [Reducing the cost of SSE-KMS with Amazon S3 Bucket Keys](https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucket-key.html) - [Deleting AWS KMS keys](https://docs.aws.amazon.com/kms/latest/developerguide/deleting-keys.html) - - diff --git a/docs/resources/legacy/design-decisions/decide-on-transactional-email-smtp-provider-for-operational-email.mdx b/docs/resources/legacy/design-decisions/decide-on-transactional-email-smtp-provider-for-operational-email.mdx index 12da0b81f..bd68d5a3d 100644 --- a/docs/resources/legacy/design-decisions/decide-on-transactional-email-smtp-provider-for-operational-email.mdx +++ b/docs/resources/legacy/design-decisions/decide-on-transactional-email-smtp-provider-for-operational-email.mdx @@ -3,9 +3,12 @@ title: "Decide on Transactional Email (SMTP) Provider for Operational Emails" sidebar_label: "Transactional Email (SMTP) Provider" sidebar_position: 100 refarch_id: REFARCH-79 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; ## Problem @@ -51,5 +54,3 @@ We’ll happily integrate with whatever SMTP system your company uses today. Gmail is not a good option because they will rate limit sending. ::: - - diff --git a/docs/resources/legacy/design-decisions/design-decisions.mdx b/docs/resources/legacy/design-decisions/design-decisions.mdx index 188280e1e..afe5c56fa 100644 --- a/docs/resources/legacy/design-decisions/design-decisions.mdx +++ b/docs/resources/legacy/design-decisions/design-decisions.mdx @@ -2,13 +2,18 @@ title: "Design Decisions" sidebar_label: "Design Decisions" sidebar_position: 200 +tags: + - design-decision --- -import Intro from '@site/src/components/Intro'; -import KeyPoints from '@site/src/components/KeyPoints'; -import DocCardList from '@theme/DocCardList' + +import Intro from "@site/src/components/Intro"; +import KeyPoints from "@site/src/components/KeyPoints"; +import DocCardList from "@theme/DocCardList"; -Design Decisions are architectural considerations for how to approach or implement some sort of functionality. The decision outcomes should be documented as Architectural Design Records (ADRs). + Design Decisions are architectural considerations for how to approach or + implement some sort of functionality. The decision outcomes should be + documented as Architectural Design Records (ADRs). See [how to document a new design decision](/learn/maintenance/tutorials/how-to-document-a-new-design-decision) to this reference architecture as well as [How to write ADRs](/learn/maintenance/tutorials/how-to-write-adrs) . @@ -22,5 +27,4 @@ This is our entire reference catalog of Design Decisions and not all may be rele They are broken down in the following way: - - + diff --git a/docs/tags.yml b/docs/tags.yml index ea1cd459f..7c0e63506 100644 --- a/docs/tags.yml +++ b/docs/tags.yml @@ -145,6 +145,10 @@ datadog: description: Datadog monitoring platform. label: Datadog +design-decision: + description: Design decisions and considerations. + label: Design Decision + developer: description: Developer-related topics. label: Developer