Skip to content

Commit a2bc4fe

Browse files
authored
Backport cluster critial fargate profiles + karpenter module (#352)
* Backport fargate profile for cluster-critial addons This feature is backported to 1.24 to ease the upgrade process to 1.25 By upgrading to the latest 1.24 module version we can add the fargate profile before the cluster critical autoscaling group is removed. * Backport karpenter module to 1.24 Allows karpenter to be installed or updated before upgrading a cluster to 1.25
1 parent 11c3125 commit a2bc4fe

File tree

12 files changed

+532
-8
lines changed

12 files changed

+532
-8
lines changed

modules/cluster/fargate.tf

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
resource "aws_eks_fargate_profile" "critical_pods" {
2+
cluster_name = aws_eks_cluster.control_plane.name
3+
fargate_profile_name = "${var.name}-critical-pods"
4+
pod_execution_role_arn = aws_iam_role.fargate.arn
5+
subnet_ids = values(var.vpc_config.private_subnet_ids)
6+
7+
dynamic "selector" {
8+
for_each = var.fargate_namespaces
9+
10+
content {
11+
namespace = selector.value
12+
labels = {}
13+
}
14+
}
15+
}
16+
17+
resource "aws_iam_role" "fargate" {
18+
name = "${var.iam_role_name_prefix}Fargate-${var.name}"
19+
assume_role_policy = data.aws_iam_policy_document.fargate_assume_role_policy.json
20+
description = "Fargate execution role for pods on ${var.name} eks cluster"
21+
}
22+
23+
data "aws_iam_policy_document" "fargate_assume_role_policy" {
24+
statement {
25+
effect = "Allow"
26+
actions = ["sts:AssumeRole"]
27+
28+
principals {
29+
type = "Service"
30+
identifiers = ["eks-fargate-pods.amazonaws.com"]
31+
}
32+
}
33+
}
34+
35+
resource "aws_iam_role_policy_attachment" "fargate_managed_policies" {
36+
for_each = toset([
37+
"arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy",
38+
"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy",
39+
])
40+
41+
role = aws_iam_role.fargate.id
42+
policy_arn = each.value
43+
}
44+

modules/cluster/outputs.tf

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
locals {
22
config = {
3-
name = aws_eks_cluster.control_plane.name
4-
endpoint = aws_eks_cluster.control_plane.endpoint
5-
ca_data = aws_eks_cluster.control_plane.certificate_authority[0].data
6-
vpc_id = var.vpc_config.vpc_id
7-
private_subnet_ids = var.vpc_config.private_subnet_ids
8-
node_security_group = aws_eks_cluster.control_plane.vpc_config.0.cluster_security_group_id
9-
node_instance_profile = var.iam_config.node_role
10-
tags = var.tags
3+
name = aws_eks_cluster.control_plane.name
4+
endpoint = aws_eks_cluster.control_plane.endpoint
5+
arn = aws_eks_cluster.control_plane.arn
6+
ca_data = aws_eks_cluster.control_plane.certificate_authority[0].data
7+
vpc_id = var.vpc_config.vpc_id
8+
private_subnet_ids = var.vpc_config.private_subnet_ids
9+
node_security_group = aws_eks_cluster.control_plane.vpc_config.0.cluster_security_group_id
10+
node_instance_profile = var.iam_config.node_role
11+
tags = var.tags
12+
iam_role_name_prefix = var.iam_role_name_prefix
13+
fargate_execution_role_arn = aws_iam_role.fargate.arn
1114
}
1215
}
1316

modules/cluster/variables.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,9 @@ variable "security_group_ids" {
223223
default = []
224224
description = "A list of security group IDs for the cross-account elastic network interfaces that Amazon EKS creates to use to allow communication with the Kubernetes control plane. *WARNING* changes to this list will cause the cluster to be recreated."
225225
}
226+
227+
variable "fargate_namespaces" {
228+
type = set(string)
229+
default = ["kube-system", "flux-system"]
230+
description = "A list of namespaces to create fargate profiles for, should be set to a list of namespaces critical for flux / cluster bootstrapping"
231+
}

modules/karpenter/README.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Karpenter
2+
3+
This module configures the resources required to run the
4+
karpenter node-provisioning tool in an eks cluster.
5+
6+
* Fargate Profile - to run karpenter
7+
* IAM roles for the fargate controller and nodes to be provisioned by karpenter
8+
* SQS queue to provide events (spot interruption etc) to karpenter
9+
10+
It does not install karpenter itself to the cluster - and we recomend
11+
that you use helm as per the [karpenter documentation](https://karpenter.sh/docs/getting-started/getting-started-with-karpenter/#4-install-karpenter)
12+
13+
It is provided as a submodule so the core module is less opinionated.
14+
15+
However we test the core module and the karpenter module
16+
in our test suite to ensure that the different components we use in our
17+
clusters at cookpad intergrate correctly.
18+
19+
20+
## Example
21+
22+
You should pass cluster and oidc config from the cluster to the karpenter module.
23+
24+
You will also need to add the IAM role of nodes created by karpenter to the aws_auth_role_map
25+
so they can connect to the cluster.
26+
27+
```hcl
28+
module "cluster" {
29+
source = "cookpad/eks/aws"
30+
name = "hal-9000"
31+
vpc_config = module.vpc.config
32+
33+
aws_auth_role_map = [
34+
{
35+
username = "system:node:{{EC2PrivateDNSName}}"
36+
rolearn = module.karpenter.node_role_arn
37+
groups = [
38+
"system:bootstrappers",
39+
"system:nodes",
40+
]
41+
},
42+
]
43+
}
44+
45+
module "karpenter" {
46+
source = "cookpad/eks/aws//modules/karpenter"
47+
48+
cluster_config = module.cluster.config
49+
oidc_config = module.cluster.oidc_config
50+
}
51+
```

modules/karpenter/controller_iam.tf

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
resource "aws_iam_role" "karpenter_controller" {
2+
name = "${var.cluster_config.iam_role_name_prefix}Karpenter-${var.cluster_config.name}"
3+
assume_role_policy = data.aws_iam_policy_document.karpenter_controller_assume_role_policy.json
4+
description = "Karpenter controller role for ${var.cluster_config.name} cluster"
5+
}
6+
7+
data "aws_iam_policy_document" "karpenter_controller_assume_role_policy" {
8+
statement {
9+
actions = ["sts:AssumeRoleWithWebIdentity"]
10+
effect = "Allow"
11+
12+
condition {
13+
test = "StringEquals"
14+
variable = "${replace(var.oidc_config.url, "https://", "")}:sub"
15+
values = ["system:serviceaccount:karpenter:karpenter"]
16+
}
17+
18+
condition {
19+
test = "StringEquals"
20+
variable = "${replace(var.oidc_config.url, "https://", "")}:aud"
21+
values = ["sts.amazonaws.com"]
22+
}
23+
24+
principals {
25+
identifiers = [var.oidc_config.arn]
26+
type = "Federated"
27+
}
28+
}
29+
}
30+
31+
resource "aws_iam_role_policy" "karpenter_controller" {
32+
name = "KarpenterController"
33+
role = aws_iam_role.karpenter_controller.id
34+
policy = data.aws_iam_policy_document.karpenter_controller.json
35+
}
36+
37+
data "aws_iam_policy_document" "karpenter_controller" {
38+
statement {
39+
sid = "AllowScopedEC2InstanceActions"
40+
effect = "Allow"
41+
42+
# tfsec:ignore:aws-iam-no-policy-wildcards
43+
resources = [
44+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}::image/*",
45+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}::snapshot/*",
46+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:spot-instances-request/*",
47+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:security-group/*",
48+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:subnet/*",
49+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*",
50+
]
51+
52+
actions = [
53+
"ec2:RunInstances",
54+
"ec2:CreateFleet",
55+
]
56+
}
57+
58+
statement {
59+
sid = "AllowScopedEC2InstanceActionsWithTags"
60+
effect = "Allow"
61+
62+
# tfsec:ignore:aws-iam-no-policy-wildcards
63+
resources = [
64+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:fleet/*",
65+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*",
66+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:volume/*",
67+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:network-interface/*",
68+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*",
69+
]
70+
71+
actions = [
72+
"ec2:RunInstances",
73+
"ec2:CreateFleet",
74+
"ec2:CreateLaunchTemplate",
75+
]
76+
77+
condition {
78+
test = "StringEquals"
79+
variable = "aws:RequestTag/kubernetes.io/cluster/${var.cluster_config.name}"
80+
values = ["owned"]
81+
}
82+
83+
condition {
84+
test = "StringLike"
85+
variable = "aws:RequestTag/karpenter.sh/provisioner-name"
86+
values = ["*"]
87+
}
88+
}
89+
90+
statement {
91+
sid = "AllowScopedResourceCreationTagging"
92+
effect = "Allow"
93+
94+
# tfsec:ignore:aws-iam-no-policy-wildcards
95+
resources = [
96+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:fleet/*",
97+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*",
98+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:volume/*",
99+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:network-interface/*",
100+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*",
101+
]
102+
103+
actions = ["ec2:CreateTags"]
104+
105+
condition {
106+
test = "StringEquals"
107+
variable = "aws:RequestTag/kubernetes.io/cluster/${var.cluster_config.name}"
108+
values = ["owned"]
109+
}
110+
111+
condition {
112+
test = "StringEquals"
113+
variable = "ec2:CreateAction"
114+
115+
values = [
116+
"RunInstances",
117+
"CreateFleet",
118+
"CreateLaunchTemplate",
119+
]
120+
}
121+
122+
condition {
123+
test = "StringLike"
124+
variable = "aws:RequestTag/karpenter.sh/provisioner-name"
125+
values = ["*"]
126+
}
127+
}
128+
129+
statement {
130+
sid = "AllowMachineMigrationTagging"
131+
effect = "Allow"
132+
# tfsec:ignore:aws-iam-no-policy-wildcards
133+
resources = ["arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*"]
134+
actions = ["ec2:CreateTags"]
135+
136+
condition {
137+
test = "StringEquals"
138+
variable = "aws:ResourceTag/kubernetes.io/cluster/${var.cluster_config.name}"
139+
values = ["owned"]
140+
}
141+
142+
condition {
143+
test = "StringEquals"
144+
variable = "aws:RequestTag/karpenter.sh/managed-by"
145+
values = [var.cluster_config.name]
146+
}
147+
148+
condition {
149+
test = "StringLike"
150+
variable = "aws:RequestTag/karpenter.sh/provisioner-name"
151+
values = ["*"]
152+
}
153+
154+
condition {
155+
test = "ForAllValues:StringEquals"
156+
variable = "aws:TagKeys"
157+
158+
values = [
159+
"karpenter.sh/provisioner-name",
160+
"karpenter.sh/managed-by",
161+
]
162+
}
163+
}
164+
165+
statement {
166+
sid = "AllowScopedDeletion"
167+
effect = "Allow"
168+
169+
# tfsec:ignore:aws-iam-no-policy-wildcards
170+
resources = [
171+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*",
172+
"arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*",
173+
]
174+
175+
actions = [
176+
"ec2:TerminateInstances",
177+
"ec2:DeleteLaunchTemplate",
178+
]
179+
180+
condition {
181+
test = "StringEquals"
182+
variable = "aws:ResourceTag/kubernetes.io/cluster/${var.cluster_config.name}"
183+
values = ["owned"]
184+
}
185+
186+
condition {
187+
test = "StringLike"
188+
variable = "aws:ResourceTag/karpenter.sh/provisioner-name"
189+
values = ["*"]
190+
}
191+
}
192+
193+
statement {
194+
sid = "AllowRegionalReadActions"
195+
effect = "Allow"
196+
resources = ["*"]
197+
198+
actions = [
199+
"ec2:DescribeAvailabilityZones",
200+
"ec2:DescribeImages",
201+
"ec2:DescribeInstances",
202+
"ec2:DescribeInstanceTypeOfferings",
203+
"ec2:DescribeInstanceTypes",
204+
"ec2:DescribeLaunchTemplates",
205+
"ec2:DescribeSecurityGroups",
206+
"ec2:DescribeSpotPriceHistory",
207+
"ec2:DescribeSubnets",
208+
]
209+
210+
condition {
211+
test = "StringEquals"
212+
variable = "aws:RequestedRegion"
213+
values = [data.aws_region.current.name]
214+
}
215+
}
216+
217+
statement {
218+
sid = "AllowSSMReadActions"
219+
effect = "Allow"
220+
resources = ["arn:${data.aws_partition.current.partition}:ssm:${data.aws_region.current.name}::parameter/aws/service/*"]
221+
actions = ["ssm:GetParameter"]
222+
}
223+
224+
statement {
225+
sid = "AllowPricingReadActions"
226+
effect = "Allow"
227+
resources = ["*"]
228+
actions = ["pricing:GetProducts"]
229+
}
230+
231+
statement {
232+
sid = "AllowInterruptionQueueActions"
233+
effect = "Allow"
234+
resources = [aws_sqs_queue.karpenter_interruption.arn]
235+
236+
actions = [
237+
"sqs:DeleteMessage",
238+
"sqs:GetQueueAttributes",
239+
"sqs:GetQueueUrl",
240+
"sqs:ReceiveMessage",
241+
]
242+
}
243+
244+
statement {
245+
sid = "AllowPassingInstanceRole"
246+
effect = "Allow"
247+
resources = [aws_iam_role.karpenter_node.arn]
248+
actions = ["iam:PassRole"]
249+
250+
condition {
251+
test = "StringEquals"
252+
variable = "iam:PassedToService"
253+
values = ["ec2.amazonaws.com"]
254+
}
255+
}
256+
257+
statement {
258+
sid = "AllowAPIServerEndpointDiscovery"
259+
effect = "Allow"
260+
resources = [var.cluster_config.arn]
261+
actions = ["eks:DescribeCluster"]
262+
}
263+
}

modules/karpenter/data.tf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
data "aws_caller_identity" "current" {}
2+
data "aws_partition" "current" {}
3+
data "aws_region" "current" {}

0 commit comments

Comments
 (0)