From 283a161a1ba1da2e726ab60ddb3e7145224d7e0c Mon Sep 17 00:00:00 2001 From: Dan Miller Date: Tue, 21 Nov 2023 06:42:05 -0800 Subject: [PATCH] Support `karpenter-crd` Helm Chart and Fix Node Interruption Handling (#868) --- modules/eks/karpenter/CHANGELOG.md | 66 ++++++ modules/eks/karpenter/README.md | 56 ++++- modules/eks/karpenter/interruption_handler.tf | 6 +- modules/eks/karpenter/karpenter-crd-upgrade | 12 +- modules/eks/karpenter/main.tf | 199 ++++++++++++------ modules/eks/karpenter/variables.tf | 12 ++ 6 files changed, 266 insertions(+), 85 deletions(-) create mode 100644 modules/eks/karpenter/CHANGELOG.md diff --git a/modules/eks/karpenter/CHANGELOG.md b/modules/eks/karpenter/CHANGELOG.md new file mode 100644 index 000000000..14dd2a56b --- /dev/null +++ b/modules/eks/karpenter/CHANGELOG.md @@ -0,0 +1,66 @@ +## Version 1.348.0 + +Components PR [#868](https://github.com/cloudposse/terraform-aws-components/pull/868) + +The `karpenter-crd` helm chart can now be installed alongside the `karpenter` helm chart to automatically manage the lifecycle of Karpenter CRDs. However since this chart must be installed before the `karpenter` helm chart, the Kubernetes namespace must be available before either chart is deployed. Furthermore, this namespace should persist whether or not the `karpenter-crd` chart is deployed, so it should not be installed with that given `helm-release` resource. Therefore, we've moved namespace creation to a separate resource that runs before both charts. Terraform will handle that namespace state migration with the `moved` block. + +There are several scenarios that may or may not require additional steps. Please review the following scenarios and follow the steps for your given requirements. + +### Upgrading an existing `eks/karpenter` deployment without changes + +If you currently have `eks/karpenter` deployed to an EKS cluster and have upgraded to this version of the component, no changes are required. `var.crd_chart_enabled` will default to `false`. + +### Upgrading an existing `eks/karpenter` deployment and deploying the `karpenter-crd` chart + +If you currently have `eks/karpenter` deployed to an EKS cluster, have upgraded to this version of the component, do not currently have the `karpenter-crd` chart installed, and want to now deploy the `karpenter-crd` helm chart, a few additional steps are required! + +First, set `var.crd_chart_enabled` to `true`. + +Next, update the installed Karpenter CRDs in order for Helm to automatically take over their management when the `karpenter-crd` chart is deployed. We have included a script to run that upgrade. Run the `./karpenter-crd-upgrade` script or run the following commands on the given cluster before deploying the chart. Please note that this script or commands will only need to be run on first use of the CRD chart. + +Before running the script, ensure that the `kubectl` context is set to the cluster where the `karpenter` helm chart is deployed. In Geodesic, you can usually do this with the `set-cluster` command, though your configuration may vary. + +```bash +set-cluster -- terraform +``` + +Then run the script or commands: + +```bash +kubectl label crd awsnodetemplates.karpenter.k8s.aws provisioners.karpenter.sh app.kubernetes.io/managed-by=Helm --overwrite +kubectl annotate crd awsnodetemplates.karpenter.k8s.aws provisioners.karpenter.sh meta.helm.sh/release-name=karpenter-crd --overwrite +kubectl annotate crd awsnodetemplates.karpenter.k8s.aws provisioners.karpenter.sh meta.helm.sh/release-namespace=karpenter --overwrite +``` + +:::info + +Previously the `karpenter-crd-upgrade` script included deploying the `karpenter-crd` chart. Now that this chart is moved to Terraform, that helm deployment is no longer necessary. + +For reference, the `karpenter-crd` chart can be installed with helm with the following: +```bash +helm upgrade --install karpenter-crd oci://public.ecr.aws/karpenter/karpenter-crd --version "$VERSION" --namespace karpenter +``` + +::: + +Now that the CRDs are upgraded, the component is ready to be applied. Apply the `eks/karpenter` component and then apply `eks/karpenter-provisioner`. + +#### Note for upgrading Karpenter from before v0.27.3 to v0.27.3 or later + +If you are upgrading Karpenter from before v0.27.3 to v0.27.3 or later, +you may need to run the following command to remove an obsolete webhook: + +```bash +kubectl delete mutatingwebhookconfigurations defaulting.webhook.karpenter.sh +``` + +See [the Karpenter upgrade guide](https://karpenter.sh/v0.32/upgrading/upgrade-guide/#upgrading-to-v0273) +for more details. + +### Upgrading an existing `eks/karpenter` deployment where the `karpenter-crd` chart is already deployed + +If you currently have `eks/karpenter` deployed to an EKS cluster, have upgraded to this version of the component, and already have the `karpenter-crd` chart installed, simply set `var.crd_chart_enabled` to `true` and redeploy Terraform to have Terraform manage the helm release for `karpenter-crd`. + +### Net new deployments + +If you are initially deploying `eks/karpenter`, no changes are required, but we recommend installing the CRD chart. Set `var.crd_chart_enabled` to `true` and continue with deployment. diff --git a/modules/eks/karpenter/README.md b/modules/eks/karpenter/README.md index c3c9b6fe2..b4be40954 100644 --- a/modules/eks/karpenter/README.md +++ b/modules/eks/karpenter/README.md @@ -21,19 +21,14 @@ components: eks/karpenter: metadata: type: abstract - settings: - spacelift: - workspace_enabled: true vars: enabled: true - tags: - Team: sre - Service: karpenter - eks_component_name: eks/cluster + eks_component_name: "eks/cluster" name: "karpenter" + # https://github.com/aws/karpenter/tree/main/charts/karpenter + chart_repository: "oci://public.ecr.aws/karpenter" chart: "karpenter" - chart_repository: "https://charts.karpenter.sh" - chart_version: "v0.16.3" + chart_version: "v0.31.0" create_namespace: true kubernetes_namespace: "karpenter" resources: @@ -47,9 +42,14 @@ components: atomic: true wait: true rbac_enabled: true + # "karpenter-crd" can be installed as an independent helm chart to manage the lifecycle of Karpenter CRDs + crd_chart_enabled: true + crd_chart: "karpenter-crd" # Set `legacy_create_karpenter_instance_profile` to `false` to allow the `eks/cluster` component # to manage the instance profile for the nodes launched by Karpenter (recommended for all new clusters). legacy_create_karpenter_instance_profile: false + # Enable interruption handling to deploy a SQS queue and a set of Event Bridge rules to handle interruption with Karpenter. + interruption_handler_enabled: true # Provision `karpenter` component on the blue EKS cluster eks/karpenter-blue: @@ -281,6 +281,37 @@ For your cluster, you will need to review the following configurations for the K ttl_seconds_until_expired: 2592000 ``` +## Node Interruption + +Karpenter also supports listening for and responding to Node Interruption events. If interruption handling is enabled, Karpenter will watch for upcoming involuntary interruption events that would cause disruption to your workloads. These interruption events include: + +- Spot Interruption Warnings +- Scheduled Change Health Events (Maintenance Events) +- Instance Terminating Events +- Instance Stopping Events + +:::info + +The Node Interruption Handler is not the same as the Node Termination Handler. The latter is always enabled and cleanly shuts down the node in 2 minutes in response to a Node Termination event. The former gets advance notice that a node will soon be terminated, so it can have 5-10 minutes to shut down a node. + +::: + +For more details, see refer to the [Karpenter docs](https://karpenter.sh/v0.32/concepts/disruption/#interruption) and [FAQ](https://karpenter.sh/v0.32/faq/#interruption-handling) + +To enable Node Interruption handling, set `var.interruption_handler_enabled` to `true`. This will create an SQS queue and a set of Event Bridge rules to deliver interruption events to Karpenter. + +## Custom Resource Definition (CRD) Management + +Karpenter ships with a few Custom Resource Definitions (CRDs). In earlier versions +of this component, when installing a new version of the `karpenter` helm chart, CRDs +were not be upgraded at the same time, requiring manual steps to upgrade CRDs after deploying the latest chart. +However Karpenter now supports an additional, independent helm chart for CRD management. +This helm chart, `karpenter-crd`, can be installed alongside the `karpenter` helm chart to automatically manage the lifecycle of these CRDs. + +To deploy the `karpenter-crd` helm chart, set `var.crd_chart_enabled` to `true`. +(Installing the `karpenter-crd` chart is recommended. `var.crd_chart_enabled` defaults +to `false` to preserve backward compatibility with older versions of this component.) + ## Troubleshooting For Karpenter issues, checkout the [Karpenter Troubleshooting Guide](https://karpenter.sh/docs/troubleshooting/) @@ -312,6 +343,7 @@ For more details, refer to: | Name | Version | |------|---------| | [aws](#provider\_aws) | >= 4.9.0 | +| [kubernetes](#provider\_kubernetes) | >= 2.7.1, != 2.21.0 | ## Modules @@ -319,7 +351,8 @@ For more details, refer to: |------|--------|---------| | [eks](#module\_eks) | cloudposse/stack-config/yaml//modules/remote-state | 1.5.0 | | [iam\_roles](#module\_iam\_roles) | ../../account-map/modules/iam-roles | n/a | -| [karpenter](#module\_karpenter) | cloudposse/helm-release/aws | 0.10.0 | +| [karpenter](#module\_karpenter) | cloudposse/helm-release/aws | 0.10.1 | +| [karpenter\_crd](#module\_karpenter\_crd) | cloudposse/helm-release/aws | 0.10.1 | | [this](#module\_this) | cloudposse/label/null | 0.25.0 | ## Resources @@ -331,6 +364,7 @@ For more details, refer to: | [aws_iam_instance_profile.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource | | [aws_sqs_queue.interruption_handler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | resource | | [aws_sqs_queue_policy.interruption_handler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue_policy) | resource | +| [kubernetes_namespace.default](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource | | [aws_eks_cluster_auth.eks](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth) | data source | | [aws_iam_policy_document.interruption_handler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | | [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source | @@ -349,6 +383,8 @@ For more details, refer to: | [chart\_version](#input\_chart\_version) | Specify the exact chart version to install. If this is not specified, the latest version is installed | `string` | `null` | no | | [cleanup\_on\_fail](#input\_cleanup\_on\_fail) | Allow deletion of new resources created in this upgrade when upgrade fails | `bool` | `true` | no | | [context](#input\_context) | Single object for setting entire context at once.
See description of individual variables for details.
Leave string and numeric variables as `null` to use default value.
Individual variable settings (non-null) override settings in context object,
except for attributes, tags, and additional\_tag\_map, which are merged. | `any` |
{
"additional_tag_map": {},
"attributes": [],
"delimiter": null,
"descriptor_formats": {},
"enabled": true,
"environment": null,
"id_length_limit": null,
"label_key_case": null,
"label_order": [],
"label_value_case": null,
"labels_as_tags": [
"unset"
],
"name": null,
"namespace": null,
"regex_replace_chars": null,
"stage": null,
"tags": {},
"tenant": null
}
| no | +| [crd\_chart](#input\_crd\_chart) | The name of the Karpenter CRD chart to be installed, if `var.crd_chart_enabled` is set to `true`. | `string` | `"karpenter-crd"` | no | +| [crd\_chart\_enabled](#input\_crd\_chart\_enabled) | `karpenter-crd` can be installed as an independent helm chart to manage the lifecycle of Karpenter CRDs. Set to `true` to install this CRD helm chart before the primary karpenter chart. | `bool` | `false` | no | | [create\_namespace](#input\_create\_namespace) | Create the namespace if it does not yet exist. Defaults to `false` | `bool` | `null` | no | | [delimiter](#input\_delimiter) | Delimiter to be used between ID elements.
Defaults to `-` (hyphen). Set to `""` to use no delimiter at all. | `string` | `null` | no | | [descriptor\_formats](#input\_descriptor\_formats) | Describe additional descriptors to be output in the `descriptors` output map.
Map of maps. Keys are names of descriptors. Values are maps of the form
`{
format = string
labels = list(string)
}`
(Type is `any` so the map values can later be enhanced to provide additional options.)
`format` is a Terraform format string to be passed to the `format()` function.
`labels` is a list of labels, in order, to pass to `format()` function.
Label values will be normalized before being passed to `format()` so they will be
identical to how they appear in `id`.
Default is `{}` (`descriptors` output will be empty). | `any` | `{}` | no | diff --git a/modules/eks/karpenter/interruption_handler.tf b/modules/eks/karpenter/interruption_handler.tf index 3f173b98e..558ee7de1 100644 --- a/modules/eks/karpenter/interruption_handler.tf +++ b/modules/eks/karpenter/interruption_handler.tf @@ -2,7 +2,7 @@ locals { interruption_handler_enabled = local.enabled && var.interruption_handler_enabled interruption_handler_queue_name = module.this.id - dns_suffix = data.aws_partition.current.dns_suffix + dns_suffix = join("", data.aws_partition.current[*].dns_suffix) events = { health_event = { @@ -40,7 +40,9 @@ locals { } } -data "aws_partition" "current" {} +data "aws_partition" "current" { + count = local.interruption_handler_enabled ? 1 : 0 +} resource "aws_sqs_queue" "interruption_handler" { count = local.interruption_handler_enabled ? 1 : 0 diff --git a/modules/eks/karpenter/karpenter-crd-upgrade b/modules/eks/karpenter/karpenter-crd-upgrade index a3e3ce05c..e6274deb3 100755 --- a/modules/eks/karpenter/karpenter-crd-upgrade +++ b/modules/eks/karpenter/karpenter-crd-upgrade @@ -2,27 +2,23 @@ function usage() { cat >&2 <<'EOF' -./karpenter-crd-upgrade +./karpenter-crd-upgrade -Use this script to upgrade the Karpenter CRDs by installing or upgrading the karpenter-crd helm chart. +Use this script to prepare a cluster for karpenter-crd helm chart support by upgrading Karpenter CRDs. EOF } function upgrade() { - VERSION="${1}" - [[ $VERSION =~ ^v ]] || VERSION="v${VERSION}" - set -x kubectl label crd awsnodetemplates.karpenter.k8s.aws provisioners.karpenter.sh app.kubernetes.io/managed-by=Helm --overwrite kubectl annotate crd awsnodetemplates.karpenter.k8s.aws provisioners.karpenter.sh meta.helm.sh/release-name=karpenter-crd --overwrite kubectl annotate crd awsnodetemplates.karpenter.k8s.aws provisioners.karpenter.sh meta.helm.sh/release-namespace=karpenter --overwrite - helm upgrade --install karpenter-crd oci://public.ecr.aws/karpenter/karpenter-crd --version "$VERSION" --namespace karpenter } if (($# == 0)); then - usage + upgrade else - upgrade $1 + usage fi diff --git a/modules/eks/karpenter/main.tf b/modules/eks/karpenter/main.tf index e20f7011f..1ebf263c4 100644 --- a/modules/eks/karpenter/main.tf +++ b/modules/eks/karpenter/main.tf @@ -25,10 +25,72 @@ resource "aws_iam_instance_profile" "default" { tags = module.this.tags } +# See CHANGELOG for PR #868: +# https://github.com/cloudposse/terraform-aws-components/pull/868 +# +# Namespace was moved from the karpenter module to an independent resource in order to be +# shared between both the karpenter and karpenter-crd modules. +moved { + from = module.karpenter.kubernetes_namespace.default[0] + to = kubernetes_namespace.default[0] +} + +resource "kubernetes_namespace" "default" { + count = local.enabled && var.create_namespace ? 1 : 0 + + metadata { + name = var.kubernetes_namespace + annotations = {} + labels = merge(module.this.tags, { name = var.kubernetes_namespace }) + } +} + +# Deploy karpenter-crd helm chart +# "karpenter-crd" can be installed as an independent helm chart to manage the lifecycle of Karpenter CRDs +module "karpenter_crd" { + enabled = local.enabled && var.crd_chart_enabled + + source = "cloudposse/helm-release/aws" + version = "0.10.1" + + name = var.crd_chart + chart = var.crd_chart + repository = var.chart_repository + description = var.chart_description + chart_version = var.chart_version + wait = var.wait + atomic = var.atomic + cleanup_on_fail = var.cleanup_on_fail + timeout = var.timeout + + create_namespace_with_kubernetes = false # Namespace is created with kubernetes_namespace resources to be shared between charts + kubernetes_namespace = join("", kubernetes_namespace.default[*].id) + kubernetes_namespace_labels = merge(module.this.tags, { name = join("", kubernetes_namespace.default[*].id) }) + + eks_cluster_oidc_issuer_url = coalesce(replace(local.eks_cluster_identity_oidc_issuer, "https://", ""), "deleted") + + values = compact([ + # standard k8s object settings + yamlencode({ + fullnameOverride = module.this.name + resources = var.resources + rbac = { + create = var.rbac_enabled + } + }), + ]) + + context = module.this.context + + depends_on = [ + kubernetes_namespace.default + ] +} + # Deploy Karpenter helm chart module "karpenter" { source = "cloudposse/helm-release/aws" - version = "0.10.0" + version = "0.10.1" chart = var.chart repository = var.chart_repository @@ -39,14 +101,14 @@ module "karpenter" { cleanup_on_fail = var.cleanup_on_fail timeout = var.timeout - create_namespace_with_kubernetes = var.create_namespace - kubernetes_namespace = var.kubernetes_namespace - kubernetes_namespace_labels = merge(module.this.tags, { name = var.kubernetes_namespace }) + create_namespace_with_kubernetes = false # Namespace is created with kubernetes_namespace resources to be shared between charts + kubernetes_namespace = join("", kubernetes_namespace.default[*].id) + kubernetes_namespace_labels = merge(module.this.tags, { name = join("", kubernetes_namespace.default[*].id) }) eks_cluster_oidc_issuer_url = coalesce(replace(local.eks_cluster_identity_oidc_issuer, "https://", ""), "deleted") service_account_name = module.this.name - service_account_namespace = var.kubernetes_namespace + service_account_namespace = join("", kubernetes_namespace.default[*].id) iam_role_enabled = true @@ -55,72 +117,75 @@ module "karpenter" { # https://github.com/aws/karpenter/issues/2649 # Apparently the source of truth for the best IAM policy is the `data.aws_iam_policy_document.karpenter_controller` in # https://github.com/terraform-aws-modules/terraform-aws-iam/blob/master/modules/iam-role-for-service-accounts-eks/policies.tf - iam_policy_statements = concat([ - { - sid = "KarpenterController" - effect = "Allow" - resources = ["*"] - - actions = [ - # https://github.com/terraform-aws-modules/terraform-aws-iam/blob/99c69ad54d985f67acf211885aa214a3a6cc931c/modules/iam-role-for-service-accounts-eks/policies.tf#L511-L581 - # The reference policy is broken up into multiple statements with different resource restrictions based on tags. - # This list has breaks where statements are separated in the reference policy for easier comparison and maintenance. - "ec2:CreateLaunchTemplate", - "ec2:CreateFleet", - "ec2:CreateTags", - "ec2:DescribeLaunchTemplates", - "ec2:DescribeImages", - "ec2:DescribeInstances", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSubnets", - "ec2:DescribeInstanceTypes", - "ec2:DescribeInstanceTypeOfferings", - "ec2:DescribeAvailabilityZones", - "ec2:DescribeSpotPriceHistory", - "pricing:GetProducts", - - "ec2:TerminateInstances", - "ec2:DeleteLaunchTemplate", - - "ec2:RunInstances", - - "iam:PassRole", - ] - }, - { - sid = "KarpenterControllerSSM" - effect = "Allow" - # Allow Karpenter to read AMI IDs from SSM - actions = ["ssm:GetParameter"] - resources = ["arn:aws:ssm:*:*:parameter/aws/service/*"] - }, - { - sid = "KarpenterControllerClusterAccess" - effect = "Allow" - actions = [ - "eks:DescribeCluster" - ] - resources = [ - module.eks.outputs.eks_cluster_arn - ] - } - ], - local.interruption_handler_enabled ? [ + iam_policy = [{ + statements = concat([ + { + sid = "KarpenterController" + effect = "Allow" + resources = ["*"] + + actions = [ + # https://github.com/terraform-aws-modules/terraform-aws-iam/blob/99c69ad54d985f67acf211885aa214a3a6cc931c/modules/iam-role-for-service-accounts-eks/policies.tf#L511-L581 + # The reference policy is broken up into multiple statements with different resource restrictions based on tags. + # This list has breaks where statements are separated in the reference policy for easier comparison and maintenance. + "ec2:CreateLaunchTemplate", + "ec2:CreateFleet", + "ec2:CreateTags", + "ec2:DescribeLaunchTemplates", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSubnets", + "ec2:DescribeInstanceTypes", + "ec2:DescribeInstanceTypeOfferings", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeSpotPriceHistory", + "pricing:GetProducts", + + "ec2:TerminateInstances", + "ec2:DeleteLaunchTemplate", + + "ec2:RunInstances", + + "iam:PassRole", + ] + }, + { + sid = "KarpenterControllerSSM" + effect = "Allow" + # Allow Karpenter to read AMI IDs from SSM + actions = ["ssm:GetParameter"] + resources = ["arn:aws:ssm:*:*:parameter/aws/service/*"] + }, { - sid = "KarpenterInterruptionHandlerAccess" + sid = "KarpenterControllerClusterAccess" effect = "Allow" actions = [ - "sqs:DeleteMessage", - "sqs:GetQueueUrl", - "sqs:GetQueueAttributes", - "sqs:ReceiveMessage", + "eks:DescribeCluster" ] resources = [ - aws_sqs_queue.interruption_handler[0].arn + module.eks.outputs.eks_cluster_arn ] } - ] : [] - ) + ], + local.interruption_handler_enabled ? [ + { + sid = "KarpenterInterruptionHandlerAccess" + effect = "Allow" + actions = [ + "sqs:DeleteMessage", + "sqs:GetQueueUrl", + "sqs:GetQueueAttributes", + "sqs:ReceiveMessage", + ] + resources = [ + one(aws_sqs_queue.interruption_handler[*].arn) + ] + } + ] : [] + ) + }] + values = compact([ # standard k8s object settings @@ -163,5 +228,9 @@ module "karpenter" { context = module.this.context - depends_on = [aws_iam_instance_profile.default] + depends_on = [ + aws_iam_instance_profile.default, + module.karpenter_crd, + kubernetes_namespace.default + ] } diff --git a/modules/eks/karpenter/variables.tf b/modules/eks/karpenter/variables.tf index 8b366c557..9b84ba3b4 100644 --- a/modules/eks/karpenter/variables.tf +++ b/modules/eks/karpenter/variables.tf @@ -25,6 +25,18 @@ variable "chart_version" { default = null } +variable "crd_chart_enabled" { + type = bool + description = "`karpenter-crd` can be installed as an independent helm chart to manage the lifecycle of Karpenter CRDs. Set to `true` to install this CRD helm chart before the primary karpenter chart." + default = false +} + +variable "crd_chart" { + type = string + description = "The name of the Karpenter CRD chart to be installed, if `var.crd_chart_enabled` is set to `true`." + default = "karpenter-crd" +} + variable "resources" { type = object({ limits = object({