forked from jenkins-infra/aws
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcik8s-cluster.tf
356 lines (325 loc) · 13.6 KB
/
cik8s-cluster.tf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
# Define a KMS main key to encrypt the EKS cluster
resource "aws_kms_key" "cik8s" {
description = "EKS Secret Encryption Key for the cluster ${local.cik8s_cluster_name}"
enable_key_rotation = true
tags = {
associated_service = "eks/${local.cik8s_cluster_name}"
}
}
# EKS Cluster definition
module "cik8s" {
source = "terraform-aws-modules/eks/aws"
version = "19.15.3"
cluster_name = local.cik8s_cluster_name
# Kubernetes version in format '<MINOR>.<MINOR>', as per https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html
cluster_version = "1.25"
# Start is inclusive, end is exclusive (!): from index 0 to index 2 (https://www.terraform.io/language/functions/slice)
# We're using the 3 first private_subnets defined in vpc.tf for this cluster
subnet_ids = slice(module.vpc.private_subnets, 0, 3)
# Required to allow EKS service accounts to authenticate to AWS API through OIDC (and assume IAM roles)
# useful for autoscaler, EKS addons and any AWS APi usage
enable_irsa = true
# Specifying the kubernetes provider to use for this cluster
# Note: this should be done AFTER initial cluster creation (bootstrap)
providers = {
kubernetes = kubernetes.cik8s
}
create_kms_key = false
cluster_encryption_config = {
provider_key_arn = aws_kms_key.cik8s.arn
resources = ["secrets"]
}
create_aws_auth_configmap = true
manage_aws_auth_configmap = true
cluster_endpoint_public_access = true
aws_auth_users = concat(local.configmap_iam_admin_accounts, [
# User used by infra.ci.jenkins.io to administrate the charts deployements with github.com/jenkins-infra/kubernetes-management
{
userarn = data.aws_iam_user.cik8s_charter.arn,
username = data.aws_iam_user.cik8s_charter.user_name,
groups = ["system:masters"],
},
])
aws_auth_accounts = [
local.aws_account_id,
]
create_cluster_primary_security_group_tags = false
# Do not use interpolated values from `local` in either keys and values of provided tags (or `cluster_tags)
# To avoid having and implicit dependency to a resource not available when parsing the module (infamous errror `Error: Invalid for_each argument`)
# Ref. same error as having a `depends_on` in https://github.com/terraform-aws-modules/terraform-aws-eks/issues/2337
tags = {
Environment = "jenkins-infra-${terraform.workspace}"
GithubRepo = "aws"
GithubOrg = "jenkins-infra"
associated_service = "eks/cik8s"
}
# VPC is defined in vpc.tf
vpc_id = module.vpc.vpc_id
## Manage EKS addons with module - https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eks_addon
# See new versions with `aws eks describe-addon-versions --kubernetes-version <k8s-version> --addon-name <addon>`
cluster_addons = {
# https://github.com/coredns/coredns/releases
coredns = {
addon_version = "v1.9.3-eksbuild.5"
}
# Kube-proxy on an Amazon EKS cluster has the same compatibility and skew policy as Kubernetes
# See https://kubernetes.io/releases/version-skew-policy/#kube-proxy
kube-proxy = {
addon_version = "v1.25.11-eksbuild.1"
}
# https://github.com/aws/amazon-vpc-cni-k8s/releases
vpc-cni = {
addon_version = "v1.13.2-eksbuild.1"
}
# https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/master/CHANGELOG.md
aws-ebs-csi-driver = {
addon_version = "v1.20.0-eksbuild.1"
service_account_role_arn = module.cik8s_irsa_ebs.iam_role_arn
}
}
eks_managed_node_groups = {
tiny_ondemand_linux = {
# This worker pool is expected to host the "technical" services such as pod autoscaler, etc.
name = "tiny-ondemand-linux"
instance_types = ["t3a.xlarge"]
capacity_type = "ON_DEMAND"
min_size = 1
max_size = 2 # Allow manual scaling when running operations or upgrades
desired_size = 1
bootstrap_extra_args = "--kubelet-extra-args '--node-labels=node.kubernetes.io/lifecycle=normal'"
suspended_processes = ["AZRebalance"]
tags = {
"k8s.io/cluster-autoscaler/enabled" = false # No autoscaling for these 2 machines
},
attach_cluster_primary_security_group = true
},
# This list of worker pool is aimed at mixed spot instances type, to ensure that we always get the most available (e.g. the cheaper) spot size
# as per https://aws.amazon.com/blogs/compute/cost-optimization-and-resilience-eks-with-spot-instances/
# Pricing table for 2023: https://docs.google.com/spreadsheets/d/1_C0I0jE-X0e0vDcdKOFIWcnwpOqWC8RQ4YOCgXNnplY/edit?usp=sharing
spot_linux_4xlarge = {
# 4xlarge: Instances supporting 3 pods (limited to 4 vCPUs/8 Gb) each with 1 vCPU/1Gb margin
name = "spot-linux-4xlarge"
capacity_type = "SPOT"
# Less than 5% eviction rate, cost below $0.08 per pod per hour
instance_types = [
"c5.4xlarge",
"c5a.4xlarge"
]
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = 90 # With 3 pods / machine, that can use ~30 Gb each at the same time (`emptyDir`)
volume_type = "gp3"
iops = 3000 # Max included with gp3 without additional cost
throughput = 125 # Max included with gp3 without additional cost
encrypted = false
delete_on_termination = true
}
}
}
spot_instance_pools = 3 # Amount of different instance that we can use
min_size = 0
max_size = 50
desired_size = 0
kubelet_extra_args = "--node-labels=node.kubernetes.io/lifecycle=spot"
tags = {
"k8s.io/cluster-autoscaler/enabled" = true,
"k8s.io/cluster-autoscaler/${local.cik8s_cluster_name}" = "owned",
"ci.jenkins.io/agents-density" = 3,
}
attach_cluster_primary_security_group = true
labels = {
"ci.jenkins.io/agents-density" = 3,
}
},
# This list of worker pool is aimed at mixed spot instances type, to ensure that we always get the most available (e.g. the cheaper) spot size
# as per https://aws.amazon.com/blogs/compute/cost-optimization-and-resilience-eks-with-spot-instances/
# Pricing table for 2023: https://docs.google.com/spreadsheets/d/1_C0I0jE-X0e0vDcdKOFIWcnwpOqWC8RQ4YOCgXNnplY/edit?usp=sharing
spot_linux_4xlarge_bom = {
# 4xlarge: Instances supporting 3 pods (limited to 4 vCPUs/8 Gb) each with 1 vCPU/1Gb margin
name = "spot-linux-4xlarge-bom"
capacity_type = "SPOT"
# Less than 5% eviction rate, cost below $0.08 per pod per hour
instance_types = [
"c5.4xlarge",
"c5a.4xlarge"
]
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = 90 # With 3 pods / machine, that can use ~30 Gb each at the same time (`emptyDir`)
volume_type = "gp3"
iops = 3000 # Max included with gp3 without additional cost
throughput = 125 # Max included with gp3 without additional cost
encrypted = false
delete_on_termination = true
}
}
}
spot_instance_pools = 3 # Amount of different instance that we can use
min_size = 0
max_size = 50
desired_size = 0
kubelet_extra_args = "--node-labels=node.kubernetes.io/lifecycle=spot"
tags = {
"k8s.io/cluster-autoscaler/enabled" = true,
"k8s.io/cluster-autoscaler/${local.cik8s_cluster_name}" = "owned",
"ci.jenkins.io/agents-density" = 3,
}
attach_cluster_primary_security_group = true
labels = {
"ci.jenkins.io/agents-density" = 3,
"ci.jenkins.io/bom" = true,
}
taints = [
{
key = "ci.jenkins.io/bom"
value = "true"
effect = "NO_SCHEDULE"
}
]
},
spot_linux_24xlarge_bom = {
# 24xlarge: Instances supporting 23 pods (limited to 4 vCPUs/8 Gb) each with 1 vCPU/1Gb margin
name = "spot-linux-24xlarge"
capacity_type = "SPOT"
# Less than 5% eviction rate, cost below $0.05 per pod per hour
instance_types = [
"m5.24xlarge",
"c5.24xlarge",
]
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = 575 # With 23 pods / machine, that can use ~25 Gb each at the same time (`emptyDir`)
volume_type = "gp3"
iops = 3000 # Max included with gp3 without additional cost
throughput = 125 # Max included with gp3 without additional cost
encrypted = false
delete_on_termination = true
}
}
}
spot_instance_pools = 2 # Amount of different instance that we can use
min_size = 0
max_size = 15
desired_size = 0
kubelet_extra_args = "--node-labels=node.kubernetes.io/lifecycle=spot"
tags = {
"k8s.io/cluster-autoscaler/enabled" = true,
"k8s.io/cluster-autoscaler/${local.cik8s_cluster_name}" = "owned",
}
attach_cluster_primary_security_group = true
labels = {
"ci.jenkins.io/agents-density" = 23,
}
taints = [
{
key = "ci.jenkins.io/bom"
value = "true"
effect = "NO_SCHEDULE"
}
]
},
}
# Allow egress from nodes (and pods...)
node_security_group_additional_rules = {
egress_jenkins_jnlp = {
description = "Allow egress to Jenkins TCP"
protocol = "TCP"
from_port = 50000
to_port = 50000
type = "egress"
cidr_blocks = ["0.0.0.0/0"]
ipv6_cidr_blocks = ["::/0"]
},
egress_http = {
description = "Allow egress to plain HTTP"
protocol = "TCP"
from_port = 80
to_port = 80
type = "egress"
cidr_blocks = ["0.0.0.0/0"]
ipv6_cidr_blocks = ["::/0"]
},
}
}
module "cik8s_iam_role_autoscaler" {
source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc"
version = "5.28.0"
create_role = true
role_name = "${local.autoscaler_account_name}-cik8s"
provider_url = replace(module.cik8s.cluster_oidc_issuer_url, "https://", "")
role_policy_arns = [aws_iam_policy.cluster_autoscaler_cik8s.arn]
oidc_fully_qualified_subjects = ["system:serviceaccount:${local.autoscaler_account_namespace}:${local.autoscaler_account_name}"]
tags = {
associated_service = "eks/${module.cik8s.cluster_name}"
}
}
module "cik8s_irsa_ebs" {
source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc"
version = "5.28.0"
create_role = true
role_name = "${local.ebs_account_name}-cik8s"
provider_url = replace(module.cik8s.cluster_oidc_issuer_url, "https://", "")
role_policy_arns = [aws_iam_policy.ebs_csi.arn]
oidc_fully_qualified_subjects = ["system:serviceaccount:${local.ebs_account_namespace}:${local.ebs_account_name}"]
tags = {
associated_service = "eks/${module.cik8s.cluster_name}"
}
}
# Reference the existing user for administrating the charts from github.com/jenkins-infra/charts
data "aws_iam_user" "cik8s_charter" {
user_name = "cik8s-charter"
}
data "aws_eks_cluster" "cik8s" {
name = local.cik8s_cluster_name
}
data "aws_eks_cluster_auth" "cik8s" {
name = local.cik8s_cluster_name
}
provider "kubernetes" {
alias = "cik8s"
host = data.aws_eks_cluster.cik8s.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.cik8s.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.cik8s.token
}
## No restriction on the resources: either managed outside terraform, or already scoped by conditions
#tfsec:ignore:aws-iam-no-policy-wildcards
data "aws_iam_policy_document" "cluster_autoscaler_cik8s" {
# Statements as per https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#full-cluster-autoscaler-features-policy-recommended
statement {
sid = "unrestricted"
effect = "Allow"
actions = [
"autoscaling:DescribeAutoScalingGroups",
"autoscaling:DescribeAutoScalingInstances",
"autoscaling:DescribeLaunchConfigurations",
"autoscaling:DescribeScalingActivities",
"autoscaling:DescribeTags",
"ec2:DescribeInstanceTypes",
"ec2:DescribeLaunchTemplateVersions"
]
resources = ["*"]
}
statement {
sid = "restricted"
effect = "Allow"
actions = [
"autoscaling:SetDesiredCapacity",
"autoscaling:TerminateInstanceInAutoScalingGroup",
"ec2:DescribeImages",
"ec2:GetInstanceTypesFromInstanceRequirements",
"eks:DescribeNodegroup"
]
resources = ["*"]
}
}
resource "aws_iam_policy" "cluster_autoscaler_cik8s" {
name_prefix = "cluster-autoscaler-cik8s"
description = "EKS cluster-autoscaler policy for cluster ${module.cik8s.cluster_name}"
policy = data.aws_iam_policy_document.cluster_autoscaler_cik8s.json
}