From ded618c956734f64ec26b118433bc0dc6361d2df Mon Sep 17 00:00:00 2001
From: Allan Carter <cartalla@amazon.com>
Date: Wed, 8 May 2024 18:07:12 -0500
Subject: [PATCH] Add config options for extra security groups and iam policies
 for head and compute nodes (#228)

Add the following config options:
* slurm/ParallelClusterConfig/ClusterConfig
* slurm/SlurmCtl/AdditionalSecurityGroups
* slurm/SlurmCtl/AdditionalIamPolicies
* slurm/SlurmCtl/Imds/Secured
* slurm/InstanceConfig/AdditionalSecurityGroups
* slurm/InstanceConfig/AdditionalIamPolicies

Added documentation for all config parameters.

Changed the StackName default from slurm-top to slurm-config.

Fix the slurm/ParallelClusterConfig/Dcv/Enabled option.
Change the option name from Enable to Enabled to match ParallelCluster.

Fix the setting of ParallelCluster HeadNode/Dcv/AllowedIps config
Was setting from non-existent slurm/ParallelClusterConfig/AllowedIps instead of slurm/ParallelClusterConfig/HeadNode/Dcv/AllowedIps.

Delete the following config option because it uses legacy cluster.
* slurm/EdfaSlurmClusterStackName

Resolves #225
---
 docs/config.md                | 860 ++++++++++++++++++++++++++++++++++
 mkdocs.yml                    |   1 +
 source/cdk/cdk_slurm_stack.py | 283 ++++++-----
 source/cdk/config_schema.py   |  35 +-
 4 files changed, 1022 insertions(+), 157 deletions(-)
 create mode 100644 docs/config.md
diff --git a/docs/config.md b/docs/config.md
new file mode 100644
index 00000000..372ae4ac
--- /dev/null
+++ b/docs/config.md
@@ -0,0 +1,860 @@
+# Configuraton File Format
+
+This project creates a ParallelCluster configuration file that is documented in the [ParallelCluster User Guide](https://docs.aws.amazon.com/parallelcluster/latest/ug/cluster-configuration-file-v3.html).
+
+<pre>
+<a href="#termination_protection">termination_protection</a>: bool
+<a href="#stackname">StackName</a>: str
+<a href="#region">Region</a>: str
+<a href="#sshkeypair">SshKeyPair</a>: str
+<a href="#vpcid">VpcId</a>: str
+<a href="#cidr">CIDR</a>: str
+<a href="#subnetid">SubnetId</a>: str
+<a href="#errorsnstopicarn">ErrorSnsTopicArn</a>: str
+<a href="#timezone">TimeZone</a>: str
+<a href="#resenvironmentname">RESEnvironmentName</a>: str
+<a href="#slurm">slurm</a>:
+    <a href="#parallelclusterconfig">ParallelClusterConfig</a>:
+        <a href="#version">Version</a>: str
+        <a href="#clusterconfig">ClusterConfig</a>: dict
+        <a href="#image">Image</a>:
+            <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/Image-v3.html#yaml-Image-Os">Os</a>: str
+            <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/Image-v3.html#yaml-Image-CustomAmi">CustomAmi</a>: str
+        <a href="#architecture">Architecture</a>: str
+        <a href="#computenodeami">ComputeNodeAmi</a>: str
+        <a href="#disablesimultaneousmultithreading">DisableSimultaneousMultithreading</a>: str
+        <a href="#enableefa">EnableEfa</a>: bool
+        <a href="#database">Database</a>:
+            <a href="#databasestackname">DatabaseStackName</a>: str
+            <a href="#fqdn">FQDN</a>: str
+            <a href="#port">Port</a>: str
+            <a href="#adminusername">AdminUserName</a>: str
+            <a href="#adminpasswordsecretarn">AdminPasswordSecretArn</a>: str
+            <a href="#clientsecuritygroup">ClientSecurityGroup</a>:
+                SecurityGroupName: SecurityGroupId
+        <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/HeadNode-v3.html#HeadNode-v3-Dcv">Dcv:</a>
+            <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/HeadNode-v3.html#yaml-HeadNode-Dcv-Enabled">Enabled</a>: bool
+            <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/HeadNode-v3.html#yaml-HeadNode-Dcv-Port">Port</a>: int
+            <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/HeadNode-v3.html#yaml-HeadNode-Dcv-AllowedIps">AllowedIps</a>: str
+        <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html">LoginNodes</a>:
+            <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#LoginNodes-v3-Pools">Pools</a>:
+            - <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Name">Name</a>: str
+                <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Count">Count</a>: int
+                <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-InstanceType">InstanceType</a>: str
+                <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-GracetimePeriod">GracetimePeriod</a>: int
+                <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Image">Image</a>:
+                    <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Image-CustomAmi">CustomAmi</a>: str
+                <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Ssh">Ssh</a>:
+                    <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Ssh-KeyName">KeyName</a>: str
+                <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Networking">Networking</a>:
+                    <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Networking-SubnetIds">SubnetIds</a>:
+                        - str
+                    <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Networking-SecurityGroups">SecurityGroups</a>:
+                        - str
+                    <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Networking-AdditionalSecurityGroups">AdditionalSecurityGroups</a>:
+                        - str
+                <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Iam">Iam</a>:
+                    <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Iam-InstanceRole">InstanceRole</a>: str
+                    <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Iam-InstanceProfile">InstanceProfile</a>: str
+                    <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Iam-AdditionalIamPolicies">AdditionalIamPolicies</a>:
+                    - <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/LoginNodes-v3.html#yaml-LoginNodes-Pools-Iam-AdditionalIamPolicies-Policy">Policy</a>: str
+    <a href="#clustername">ClusterName</a>: str
+    <a href="#mungekeysecret">MungeKeySecret</a>: str
+    <a href="#slurmctl">SlurmCtl</a>:
+        <a href="#slurmdport">SlurmdPort</a>: int
+        <a href="#instance_type">instance_type</a>: str
+        <a href="#volume_size">volume_size</a>: str
+        <a href="#cloudwatchperiod">CloudWatchPeriod</a>: int
+        <a href="#preemptmode">PreemptMode</a>: str
+        <a href="#preempttype">PreemptType</a>: str
+        <a href="#preemptexempttime">PreemptExemptTime</a>: str
+        <a href="#slurmconfoverrides">SlurmConfOverrides</a>: str
+        <a href="#slurmrestduid">SlurmrestdUid</a>: str
+        <a href="#head-node-additionalsecuritygroups">AdditionalSecurityGroups</a>:
+        - str
+        <a href="#head-node-additionaliampolicies">AdditionalIamPolicies</a>:
+        - str
+        <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/HeadNode-v3.html#HeadNode-v3-Imds">Imds</a>:
+            <a href="https://docs.aws.amazon.com/parallelcluster/latest/ug/HeadNode-v3.html#yaml-HeadNode-Imds-Secured">Secured</a>: bool
+    <a href="#submittersecuritygroupids">SubmitterSecurityGroupIds</a>:
+        SecurityGroupName: SecurityGroupId
+    <a href="#submitterinstancetags">SubmitterInstanceTags</a>: str
+        TagName:
+        - TagValues
+    <a href="#instanceconfig">InstanceConfig</a>:
+        <a href="#usespot">UseSpot</a>: str
+        <a href="#exclude">Exclude</a>:
+            <a href="#exclude-instancefamilies">InstanceFamilies</a>:
+            - str
+            <a href="#exclude-instancetypes">InstanceTypes</a>:
+            - str
+        <a href="#include">Include</a>:
+            <a href="#maxsizeonly">MaxSizeOnly</a>: bool
+            <a href="#include-instancefamilies">InstanceFamilies</a>:
+            - str
+            <a href="#include-instancetypes">InstanceTypes</a>:
+            - str
+        <a href="#nodecounts">NodeCounts</a>:
+            <a href="#defaultmincount">DefaultMinCount</a>: str
+            <a href="#defaultmaxcount">DefaultMaxCount</a>: str
+            <a href="#computeresourcecounts">ComputeResourceCounts</a>:
+                str: # <a href="#computeresourcename">ComputeResourceName</a>
+                    <a href="#compute-resource-mincount">MinCount</a>: int
+                    <a href="#compute-resource-maxcount">MaxCount</a>: int
+        <a href="#compute-node-additionalsecuritygroups">AdditionalSecurityGroups</a>:
+        - str
+        <a href="#compute-node-additionaliampolicies">AdditionalIamPolicies</a>:
+        - str
+        <a href="#onpremcomputenodes">OnPremComputeNodes</a>:
+            <a href="#configfile">ConfigFile</a>: str
+            <a href="#on-premises-cidr">CIDR</a>: str
+            <a href="#partition">Partition</a>: str
+    <a href="#slurmuid">SlurmUid</a>: int
+    <a href="#storage">storage</a>:
+        <a href="#extramounts">ExtraMounts</a>:
+        - <a href="#dest">dest</a>: str
+          <a href="#src">src</a>: str
+          <a href="#type">type</a>: str
+          <a href="#options">options</a>: str
+          <a href="#storagetype">StorageType</a>: str
+          <a href="#filesystemid">FileSystemId</a>: str
+          <a href="#volumeid">VolumeId</a>: str
+        <a href="#extramountsecuritygroups">ExtraMountSecurityGroups</a>:
+            <a href="#filesystemtype">FileSystemType</a>:
+                SecurityGroupName: SecurityGroupId
+    <a href="#licenses">Licenses</a>:
+        <a href="#licensename">LicenseName</a>:
+            <a href="#count">Count</a>: int
+            <a href="#server">Server</a>: str
+            <a href="#port">Port</a>: str
+            <a href="#servertype">ServerType</a>:
+            <a href="#statusscript">StatusScript</a>:
+</pre>
+
+
+## Top Level Config
+
+### termination_protection
+
+Enable Cloudformation Stack termination protection
+
+default=True
+
+### StackName
+
+The name of the configuration stack that will configure ParallelCluster and deploy it.
+
+If you do not specify the ClusterName then it will default to a value based on the StackName.
+If StackName ends in `-config` then ClusterName will be the StackName with `-config` stripped off.
+Otherwise it will be the StackName with `-cl` (for cluster) appended.
+
+Optional so can be specified on the command-line
+
+default='slurm-config'
+
+### Region
+
+AWS region where the cluster will be deployed.
+
+Optional so can be specified on the command-line
+
+### SshKeyPair
+
+Default EC2 key pair that will be used for all cluster instances.
+
+Optional so can be specified on the command-line
+
+### VpcId
+
+The ID of the VPC where the cluster will be deployed.
+
+Optional so can be specified on the command-line
+
+### CIDR
+
+The CIDR of the VPC.
+This is used in security group rules.
+
+### SubnetId
+
+The ID of the VPC subnet where the cluster will be deployed.
+
+Optional. If not specified then the first private subnet is chosen.
+If no private subnets exist, then the first isolated subnet is chosen.
+If no isolated subnets exist, the the first public subnet is chosen.
+
+We recommend using a private or isolated subnet.
+
+### ErrorSnsTopicArn
+
+The ARN of an existing SNS topic.
+Errors will be published to the SNS topic.
+You can subscribe to the topic so that you are notified for things like script or lambda errors.
+
+Optional, but highly recommended
+
+### TimeZone
+
+The time zone to use for all EC2 instances in the cluster.
+
+default='US/Central'
+
+### RESEnvironmentName
+
+If you are deploying the cluster to use from Research and Engineering Studio (RES) virtual desktops, then you
+can specify the environment name so that the virtual desktops automatically get configured to use the cluster.
+
+The security group of the desktops will be updated with rules that allow them to talk to the cluster and the
+cluster will be configured on the desktop.
+
+The Slurm binaries will be compiled for the OS of the desktops and and environment modulefile will be created
+so that the users just need to load the cluster modulefile to use the cluster.
+
+## slurm
+
+Slurm configuration parameters.
+
+### ParallelClusterConfig
+
+ParallelCluster specific configuration parameters.
+
+#### Version
+
+The ParallelCluster version.
+
+This is required and cannot be changed after the cluster is created.
+
+Updating to a new version of ParallelCluster requires either deleting the current cluster or creating a new cluster.
+
+#### ClusterConfig
+
+type: dict
+
+Additional ParallelCluster configuration settings that will be directly added
+to the configuration without checking.
+
+This will will be used to create the initial ParallelCluster configuration and other settings in this configuration file will override values in the dict.
+
+This exists to enable further customization of ParallelCluster beyond what this configuration supports.
+
+#### Image
+
+The OS and AMI to use for the head node and compute nodes.
+
+##### OS
+
+See the [ParallelCluster docs](https://docs.aws.amazon.com/parallelcluster/latest/ug/Image-v3.html#yaml-Image-Os) for the supported OS distributions and versions.
+
+##### CustomAmi
+
+See the [ParallelCluster docs](https://docs.aws.amazon.com/parallelcluster/latest/ug/Image-v3.html#yaml-Image-CustomAmi) for the custom AMI documentation.
+
+**NOTE**: A CustomAmi must be provided for Rocky8.
+All other distributions have a default AMI that is provided by ParallelCluster.
+
+#### Architecture
+
+The CPU architecture to use for the cluster.
+
+ParallelCluster doesn't support heterogeneous clusters.
+All of the instances must have the same CPU architecture and the same OS.
+
+The cluster, however, can be accessed from login nodes of any architecture and OS.
+
+Valid Values:
+
+* arm64
+* x86_64
+
+default: x86_64
+
+#### ComputeNodeAmi
+
+AMI to use for compute nodes.
+
+All compute nodes will use the same AMI.
+
+The default AMI is selected by the [Image](#image) parameters.
+
+#### DisableSimultaneousMultithreading
+
+type: bool
+
+default=True
+
+Disable SMT on the compute nodes.
+
+If true, multithreading on the compute nodes is disabled.
+
+Not all instance types can disable multithreading. For a list of instance types that support disabling multithreading, see CPU cores and threads for each CPU core per instance type in the Amazon EC2 User Guide for Linux Instances.
+
+Update policy: The compute fleet must be stopped for this setting to be changed for an update.
+
+[ParallelCluster documentation](https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html#yaml-Scheduling-SlurmQueues-ComputeResources-DisableSimultaneousMultithreading)
+
+#### EnableEfa
+
+type: bool
+
+default: False
+
+Recommend to not use EFA unless necessary to avoid insufficient capacity errors when starting new instances in group or when multiple instance types in the group.
+
+See [https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html#placement-groups-cluster](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html#placement-groups-cluster)
+
+#### Database
+
+Optional
+
+Configure the Slurm database to use with the cluster.
+
+This is created independently of the cluster so that the same database can be used with multiple clusters.
+
+The easiest way to do this is to use the [CloudFormation template provided by ParallelCluster](https://docs.aws.amazon.com/parallelcluster/latest/ug/tutorials_07_slurm-accounting-v3.html#slurm-accounting-db-stack-v3) and then to just pass
+the name of the stack in [DatabaseStackName](#databasestackname).
+All of the other parameters will be pulled from the stack.
+
+See the [ParallelCluster documentation](https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html#Scheduling-v3-SlurmSettings-Database).
+
+##### DatabaseStackName
+
+Name of the ParallelCluster CloudFormation stack that created the database.
+
+The following parameters will be set using the outputs of the stack:
+
+* FQDN
+* Port
+* AdminUserName
+* AdminPasswordSecretArn
+* ClientSecurityGroup
+
+##### FQDN
+
+Used with the Port to set the [Uri](https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html#yaml-Scheduling-SlurmSettings-Database-Uri) of the database.
+
+##### Port
+
+type: int
+
+Database's port.
+
+##### AdminUserName
+
+type: str
+
+The identity that Slurm uses to connect to the database, write accounting logs, and perform queries. The user must have both read and write permissions on the database.
+
+Sets the [UserName](https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html#yaml-Scheduling-SlurmSettings-Database-UserName) parameter in ParallelCluster.
+
+##### AdminPasswordSecretArn
+
+type: str
+
+The Amazon Resource Name (ARN) of the AWS Secrets Manager secret that contains the AdminUserName plaintext password.
+This password is used together with AdminUserName and Slurm accounting to authenticate on the database server.
+
+Sets the [PasswordSecretArn](https://docs.aws.amazon.com/parallelcluster/latest/ug/Scheduling-v3.html#yaml-Scheduling-SlurmSettings-Database-PasswordSecretArn) parameter in ParallelCluster.
+
+##### ClientSecurityGroup
+
+Security group that has permissions to connect to the database.
+
+Required to be attached to the head node that is running slurmdbd so that the port connection to the database is allows.
+
+### ClusterName
+
+Name of the ParallelCluster cluster.
+
+Default: If StackName ends with "-config" then ClusterName is StackName with "-config" stripped off.
+Otherwise add "-cl" to end of StackName.
+
+### MungeKeySecret
+
+AWS secret with a base64 encoded munge key to use for the cluster.
+For an existing secret can be the secret name or the ARN.
+If the secret doesn't exist one will be created, but won't be part of the cloudformation stack so that it won't be deleted when the stack is deleted.
+Required if your submitters need to use more than 1 cluster.
+
+### SlurmCtl
+
+Configure the Slurm head node or controller.
+
+Required, but can be an empty dict to accept all of the defaults.
+
+#### SlurmdPort
+
+Port used for the slurmd daemon on the compute nodes.
+
+default=6818
+
+type: int
+
+#### instance_type
+
+Instance type of the head node.
+
+Must match the architecture of the cluster.
+
+#### volume_size
+
+The size of the EBS root volume on the head node in GB.
+
+default=200
+
+type: int
+
+#### CloudWatchPeriod
+
+The frequency of CloudWatch metrics in seconds.
+
+default=5
+
+type: int
+
+#### PreemptMode
+
+Set job preemption policy for the cluster.
+
+Jobs can be set to be preemptible when they are submitted.
+This allows higher priority jobs to preempt a running job when resources are constrained.
+This policy sets what happens to the preempted jobs.
+
+[Slurm documentation](https://slurm.schedmd.com/slurm.conf.html#OPT_PreemptMode)
+
+Valid values:
+
+* 'OFF'
+* 'CANCEL'
+* 'GANG'
+* 'REQUEUE'
+* 'SUSPEND'
+
+default='REQUEUE'
+
+#### PreemptType
+
+[Slurm documentation](https://slurm.schedmd.com/slurm.conf.html#OPT_PreemptType)
+
+Valid values:
+
+* 'preempt/none'
+* 'preempt/partition_prio'
+* 'preempt/qos'
+
+default='preempt/partition_prio'
+
+#### PreemptExemptTime
+
+[Slurm documentation](https://slurm.schedmd.com/slurm.conf.html#OPT_PreemptExemptTime)
+
+Global option for minimum run time for all jobs before they can be considered for preemption.
+
+A time of -1 disables the option, equivalent to 0. Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days-hours", "days-hours:minutes", and "days-hours:minutes:seconds".
+
+default='0'
+
+type: str
+
+#### SlurmConfOverrides
+
+File that will be included at end of slurm.conf to override configuration parameters.
+
+This allows you to customize the slurm configuration arbitrarily.
+
+This should be used with caution since it can result in errors that make the cluster non-functional.
+
+type: str
+
+#### SlurmrestdUid
+
+User ID for the slurmrestd daemon.
+
+type: int
+
+default=901
+
+#### SlurmRestApiVersion
+
+The REST API version.
+
+This is automatically set based on the Slurm version being used by the ParallelCluster version.
+
+type: str
+
+default: ''0.0.39'
+
+#### Head Node AdditionalSecurityGroups
+
+Additional security groups that will be added to the head node instance.
+
+#### Head Node AdditionalIamPolicies
+
+List of Amazon Resource Names (ARNs) of IAM policies for Amazon EC2 that will be added to the head node instance.
+
+### SubmitterSecurityGroupIds
+
+External security groups that should be able to use the cluster.
+
+Rules will be added to allow it to interact with Slurm.
+
+### SubmitterInstanceTags
+
+Tags of instances that can be configured to submit to the cluster.
+
+When the cluster is deleted, the tag is used to unmount the slurm filesystem from the instances using SSM.
+
+### InstanceConfig
+
+Configure the instances used by the cluster.
+
+A partition will be created for each combination of Base OS, Architecture, and Spot.
+
+#### UseSpot
+
+Configure spot instances.
+
+type: bool
+
+default: True
+
+#### Exclude
+
+Instance families and types to exclude.
+
+Exclude patterns are processed first and take precesdence over any includes.
+
+Instance families and types are regular expressions with implicit '^' and '$' at the begining and end.
+
+##### Exclude InstanceFamilies
+
+Regular expressions with implicit '^' and '$' at the begining and end.
+
+An empty list is the same as '.*'.
+
+Default:
+
+```
+default_excluded_instance_families = [
+    'a1',   # Graviton 1
+    'c4',   # Replaced by c5
+    'd2',   # SSD optimized
+    'g3',   # Replaced by g4
+    'g3s',  # Replaced by g4
+    'h1',   # SSD optimized
+    'i3',   # SSD optimized
+    'i3en', # SSD optimized
+    'm4',   # Replaced by m5
+    'p2',   # Replaced by p3
+    'p3',
+    'p3dn',
+    'r4',   # Replaced by r5
+    't2',   # Replaced by t3
+    'x1',
+    'x1e',
+]
+```
+
+##### Exclude InstanceTypes
+
+Regular expressions with implicit '^' and '$' at the begining and end.
+
+An empty list is the same as '.*'.
+
+Default:
+
+```
+default_excluded_instance_types = [
+    '.+\.(micro|nano)', # Not enough memory
+    '.*\.metal.*'
+]
+```
+
+#### Include
+
+Instance families and types to include.
+
+Exclude patterns are processed first and take precesdence over any includes.
+
+Instance families and types are regular expressions with implicit '^' and '$' at the begining and end.
+
+##### MaxSizeOnly
+
+type: bool
+
+default: False
+
+If MaxSizeOnly is True then only the largest instance type in a family will be included unless specific instance types are included.
+
+##### Include InstanceFamilies
+
+Regular expressions with implicit '^' and '$' at the begining and end.
+
+An empty list is the same as '.*'.
+
+Default:
+
+```
+default_eda_instance_families = [
+    'c7a',               # AMD EPYC 9R14 Processor 3.7 GHz
+
+    'c7g',               # AWS Graviton3 Processor 2.6 GHz
+    # 'c7gd',              # AWS Graviton3 Processor 2.6 GHz
+    # 'c7gn',              # AWS Graviton3 Processor 2.6 GHz
+
+    # 'c7i',               # Intel Xeon Scalable (Sapphire Rapids) 3.2 GHz
+
+    #'f1',                # Intel Xeon E5-2686 v4 (Broadwell) 2.3 GHz
+
+    'm5zn',              # Intel Xeon Platinum 8252 4.5 GHz
+
+    'm7a',               # AMD EPYC 9R14 Processor 3.7 GHz
+
+    # 'm7i',               # Intel Xeon Scalable (Sapphire Rapids) 3.2 GHz
+
+    'm7g',               # AWS Graviton3 Processor 2.6 GHz
+    # 'm7gd',               # AWS Graviton3 Processor 2.6 GHz
+
+    'r7a',               # AMD EPYC 9R14 Processor 3.7 GHz
+
+    'r7g',               # AWS Graviton3 Processor 2.6 GHz
+    # 'r7gd',               # AWS Graviton3 Processor 2.6 GHz
+
+    # 'r7i',               # Intel Xeon Scalable (Sapphire Rapids) 3.2 GHz
+
+    'r7iz',              # Intel Xeon Scalable (Sapphire Rapids) 3.2 GHz
+
+    'x2gd',              # AWS Graviton2 Processor 2.5 GHz 1TB
+
+    'x2idn',             # Intel Xeon Scalable (Icelake) 3.5 GHz 2 TB
+
+    'x2iedn',            # Intel Xeon Scalable (Icelake) 3.5 GHz 4 TB
+
+    'x2iezn',            # Intel Xeon Platinum 8252 4.5 GHz 1.5 TB
+
+    #'u-6tb1',            # Intel Xeon Scalable (Skylake) 6 TB
+    #'u-9tb1',            # Intel Xeon Scalable (Skylake) 9 TB
+    #'u-12tb1',           # Intel Xeon Scalable (Skylake) 12 TB
+]
+```
+
+##### Include InstanceTypes
+
+Regular expressions with implicit '^' and '$' at the begining and end.
+
+An empty list is the same as '.*'.
+
+Default:
+
+```
+default_eda_instance_types = [
+    #'c5\.(l|x|2|4|9|18).*',  # Intel Xeon Platinum 8124M 3.4 GHz
+    #'c5\.(12|24).*',         # Intel Xeon Platinum 8275L 3.6 GHz
+    #'c5d\.(l|x|2|4|9|18).*', # Intel Xeon Platinum 8124M 3.4 GHz
+    #'c5d\.(12|24).*',        # Intel Xeon Platinum 8275L 3.6 GHz
+]
+```
+
+#### NodeCounts
+
+Configure the number of compute nodes of each instance type.
+
+##### DefaultMinCount
+
+type: int
+
+default: 0
+
+Minimum number of compute nodes to keep running in a compute resource.
+If the number is greater than zero then static nodes will be created.
+
+##### DefaultMaxCount
+
+type: int
+
+The maximum number of compute nodes to create in a compute resource.
+
+##### ComputeResourceCounts
+
+Define compute node counts per compute resource.
+
+These counts will override the defaults set by [DefaultMinCount](#defaultmincount) and [DefaultMaxCount](#defaultmaxcount).
+
+###### ComputeResourceName
+
+Name of the ParallelCluster compute resource. Can be found using `sinfo`.
+
+####### Compute Resource MinCount
+
+type: int
+
+default: 0
+
+####### Compute Resource MaxCount
+
+type: int
+
+#### Compute Node AdditionalSecurityGroups
+
+Additional security groups that will be added to the compute node instances.
+
+#### Compute Node AdditionalIamPolicies
+
+List of Amazon Resource Names (ARNs) of IAM policies for Amazon EC2 that will be added to the compute node instances.
+
+#### OnPremComputeNodes
+
+Define on-premises compute nodes that will be managed by the ParallelCluster head node.
+
+The compute nodes must be accessible from the head node over the network and any firewalls must allow all of the Slurm ports between the head node and compute nodes.
+
+ParallelCluster will be configured to allow the neccessary network traffic and
+the on-premises firewall can be configured to match the ParallelCluster seccurity groups.
+
+##### ConfigFile
+
+Configuration file with the on-premises compute nodes defined in Slurm NodeName format as described in the [Slurm slurm.conf documentation](https://slurm.schedmd.com/slurm.conf.html#OPT_NodeName).
+
+The file will be included in the ParallelCluster slurm.conf so it can technically include any Slurm configuration updates including custom partition definitions.
+
+**NOTE**: The syntax of the file isn't checked and syntax errors can result in the slurmctld daemon failing on the head node.
+
+##### On-Premises CIDR
+
+The CIDR that contains the on-premises compute nodes.
+
+This is to allow egress from the head node to the on-premises nodes.
+
+##### Partition
+
+A partition that will contain all of the on-premises nodes.
+
+### SlurmUid
+
+type: int
+
+default: 900
+
+The user id of the slurm user.
+
+### storage
+
+#### ExtraMounts
+
+Additional mounts for compute nodes.
+
+This can be used so the compute nodes have the same file structure as the remote desktops.
+
+This is used to configure [ParallelCluster SharedStorage](https://docs.aws.amazon.com/parallelcluster/latest/ug/SharedStorage-v3.html).
+
+##### dest
+
+The directory where the file system will be mounted.
+
+This sets the [MountDir](https://docs.aws.amazon.com/parallelcluster/latest/ug/SharedStorage-v3.html#yaml-SharedStorage-MountDir).
+
+##### src
+
+The source path on the file system export that will be mounted.
+
+##### type
+
+The type of mount. For example, nfs3.
+
+##### options
+
+Mount options.
+
+##### StorageType
+
+The type of file system to mount.
+
+Valid values:
+
+* Efs
+* FsxLustre
+* FsxOntap
+* FsxOpenZfs
+
+##### FileSystemId
+
+Specifies the ID of an existing [FSx for Lustre](https://docs.aws.amazon.com/parallelcluster/latest/ug/SharedStorage-v3.html#yaml-SharedStorage-FsxLustreSettings-FileSystemId) or [EFS](https://docs.aws.amazon.com/parallelcluster/latest/ug/SharedStorage-v3.html#yaml-SharedStorage-EfsSettings-FileSystemId) file system.
+
+##### VolumeId
+
+Specifies the volume ID of an existing [FSx for ONTAP](https://docs.aws.amazon.com/parallelcluster/latest/ug/SharedStorage-v3.html#yaml-SharedStorage-FsxOntapSettings-VolumeId) or [FSx for OpenZFS](https://docs.aws.amazon.com/parallelcluster/latest/ug/SharedStorage-v3.html#yaml-SharedStorage-FsxOpenZfsSettings-VolumeId) file system.
+
+#### ExtraMountSecurityGroups
+
+The security groups used by the file systems so that the head and comnpute nodes
+can be configured to connect to them.
+
+For example:
+
+```
+storage:
+    ExtraMounts:
+    - dest: "/tools"
+      StorageType: FsxOpenZfs
+      VolumeId: 'fsvol-abcd1234'
+      src: 'fs-efgh5678.fsx.us-east-1.amazonaws.com:/fsx/'
+      type: nfs4
+      options: 'nfsvers=4.1'
+    ExtraMountSecurityGroups:
+        zfs:
+            FsxSG: sg-12345678
+```
+
+##### FileSystemType
+
+Type of file system so that the appropriate ports can be opened.
+
+Valid values:
+
+* nfs
+* zfs
+* lustre
+
+### Licenses
+
+Configure license counts for the scheduler.
+
+If the Slurm database is configured then it will be updated with the license counts.
+Otherwise, the license counts will be added to slurm.conf.
+
+#### LicenseName
+
+The name of the license, for example, `VCSCompiler_Net` or `VCSMXRunTime_Net`.
+This is the license name that users specify when submitting a job.
+It doesn't have to match the license name reported by the license server,
+although that probably makes the most sense.
+
+##### Count
+
+The number of licenses available to Slurm to use to schedule jobs.
+Once all of the license are used by running jobs, then any pending jobs will remain pending until a license becomes available.
+
+##### Server
+
+The license server hosting the licenses.
+
+Not currently used.
+
+##### Port
+
+The port on the license server used to request licenses.
+
+Not currently used.
+
+##### ServerType
+
+The type of license server, such as FlexLM.
+
+Not currently used.
+
+##### StatusScript
+
+A script that queries the license server and dynamically updates the Slurm database with the actual total number of licenses and the number used.
+
+Not currently implemented.
+
+</pre>
diff --git a/mkdocs.yml b/mkdocs.yml
index 9bce6670..85cd71ea 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -5,6 +5,7 @@ nav:
   - 'index.md'
   - 'deployment-prerequisites.md'
   - 'deploy-parallel-cluster.md'
+  - 'config.md'
   - 'res_integration.md'
   - 'soca_integration.md'
   - 'custom-amis.md'
diff --git a/source/cdk/cdk_slurm_stack.py b/source/cdk/cdk_slurm_stack.py
index cb201aa3..03c35b0a 100644
--- a/source/cdk/cdk_slurm_stack.py
+++ b/source/cdk/cdk_slurm_stack.py
@@ -299,15 +299,11 @@ def check_config(self):
                 config_errors += 1
 
         if 'Database' in self.config['slurm']['ParallelClusterConfig']:
-            if 'DatabaseStackName' in self.config['slurm']['ParallelClusterConfig']['Database'] and 'EdaSlurmClusterStackName' in self.config['slurm']['ParallelClusterConfig']['Database']:
-                logger.error(f"Cannot specify both slurm/ParallelClusterConfig/Database/DatabaseStackName and slurm/ParallelClusterConfig/Database/EdaSlurmClusterStackName")
-                config_errors += 1
-
             required_keys = ['ClientSecurityGroup', 'FQDN', 'Port', 'AdminUserName', 'AdminPasswordSecretArn']
-            if 'DatabaseStackName' in self.config['slurm']['ParallelClusterConfig']['Database'] or 'EdaSlurmClusterStackName' in self.config['slurm']['ParallelClusterConfig']['Database']:
+            if 'DatabaseStackName' in self.config['slurm']['ParallelClusterConfig']['Database']:
                 invalid_keys = []
                 for database_key in self.config['slurm']['ParallelClusterConfig']['Database']:
-                    if database_key in ['DatabaseStackName', 'EdaSlurmClusterStackName']:
+                    if database_key in ['DatabaseStackName']:
                         continue
                     if database_key in required_keys:
                         logger.error(f"Cannot specify slurm/ParallelClusterConfig/Database/{database_key} and slurm/ParallelClusterConfig/Database/[Database,EdaSlurmCluster]StackName")
@@ -366,31 +362,6 @@ def check_config(self):
                     if database_key not in self.config['slurm']['ParallelClusterConfig']['Database']:
                         logger.error(f"{output} output not found in self.config['slurm']['ParallelClusterConfig']['Database']['DatabaseStackName'] stack to set slurm/ParallelClusterConfig/Database/{database_key}")
 
-            elif 'EdaSlurmClusterStackName' in self.config['slurm']['ParallelClusterConfig']['Database']:
-                cfn_client = boto3.client('cloudformation', region_name=self.config['region'])
-                stack_outputs = cfn_client.describe_stacks(StackName=self.config['slurm']['ParallelClusterConfig']['Database']['EdaSlurmClusterStackName'])['Stacks'][0]['Outputs']
-                output_to_key_map = {
-                    'DatabaseHost': 'FQDN',
-                    'DatabasePort': 'Port',
-                    'DatabaseAdminUser': 'AdminUserName',
-                    'DatabaseAdminPasswordSecretArn': 'AdminPasswordSecretArn',
-                    'SlurmDbdSecurityGroup': 'ClientSecurityGroup'
-                }
-                for output in stack_outputs:
-                    if output['OutputKey'] in output_to_key_map:
-                        database_key = output_to_key_map[output['OutputKey']]
-                        if database_key == 'Port':
-                            value = int(output['OutputValue'])
-                        else:
-                            value = output['OutputValue']
-                        if database_key == 'ClientSecurityGroup':
-                            self.config['slurm']['ParallelClusterConfig']['Database'][database_key] = {f"{self.config['slurm']['ParallelClusterConfig']['Database']['EdaSlurmClusterStackName']}-SlurmDbdSG": value}
-                        else:
-                            self.config['slurm']['ParallelClusterConfig']['Database'][database_key] = value
-                for output, database_key in output_to_key_map.items():
-                    if database_key not in self.config['slurm']['ParallelClusterConfig']['Database']:
-                        logger.error(f"{output} output not found in self.config['slurm']['ParallelClusterConfig']['Database']['EdaSlurmClusterStackName'] stack to set slurm/ParallelClusterConfig/Database/{database_key}")
-
             else:
                 for database_key in required_keys:
                     if database_key not in self.config['slurm']['ParallelClusterConfig']['Database']:
@@ -2337,130 +2308,136 @@ def create_parallel_cluster_config(self):
                 logger.error(f"Config slurm/ParallelClusterConfig/ComputeNodeAmi({compute_node_ami}) architecture=={ami_architecture}. Must be the same as slurm/ParallelClusterConfig/Architecture({cluster_architecture})")
                 exit(1)
 
-        self.parallel_cluster_config = {
-            'HeadNode': {
-                'Dcv': {
-                    'Enabled': self.config['slurm']['ParallelClusterConfig']['Dcv']['Enable'],
-                    'Port': self.config['slurm']['ParallelClusterConfig']['Dcv']['Port']
-                },
-                'Iam': {
-                    'AdditionalIamPolicies': [
-                        {'Policy': 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore'},
-                        {'Policy': '{{ParallelClusterAssetReadPolicyArn}}'},
-                        {'Policy': '{{ParallelClusterSnsPublishPolicyArn}}'},
-                        {'Policy': '{{ParallelClusterJwtWritePolicyArn}}'},
-                        {'Policy': '{{ParallelClusterMungeKeyWritePolicyArn}}'},
-                    ],
-                },
-                'Imds': {
-                    'Secured': True
-                },
-                'InstanceType': self.config['slurm']['SlurmCtl']['instance_type'],
-                'Ssh': {
-                    'KeyName':self.config['SshKeyPair']
-                },
-                'Networking': {
-                    'SubnetId': self.config['SubnetId'],
-                    'AdditionalSecurityGroups': [
-                        '{{SlurmCtlSecurityGroupId}}'
-                    ]
-                },
-                'CustomActions': {
-                    'OnNodeStart': {
-                        'Sequence': [
-                            {
-                                'Script': self.custom_action_s3_urls['config/bin/on_head_node_start.sh'],
-                                'Args': []
-                            }
-                        ]
-                    },
-                    'OnNodeConfigured': {
-                        'Sequence': [
-                            {
-                                'Script': self.custom_action_s3_urls['config/bin/on_head_node_configured.sh'],
-                                'Args': []
-                            }
-                        ]
-                    },
-                    'OnNodeUpdated': {
-                        'Sequence': [
-                            {
-                                'Script': self.custom_action_s3_urls['config/bin/on_head_node_updated.sh'],
-                                'Args': []
-                            }
-                        ]
-                    }
-                },
-            },
-            'Image': {
-                'Os': self.config['slurm']['ParallelClusterConfig']['Image']['Os']
-            },
-            'Imds': {
-                'ImdsSupport': 'v2.0'
-            },
-            'Region': self.cluster_region,
-            'Scheduling': {
-                'Scheduler': 'slurm',
-                'SlurmQueues': [],
-                'SlurmSettings': {
-                    'EnableMemoryBasedScheduling': True,
-                    'CustomSlurmSettings': [
-                        {'AuthAltTypes': 'auth/jwt'},
-                        {'AuthAltParameters': 'jwt_key=/opt/slurm/var/spool/jwt_hs256.key'},
-                        {'FederationParameters': 'fed_display'},
-                        # JobRequeue must be set to 1 to enable preemption to requeue jobs.
-                        {'JobRequeue': 1},
-                        # {'LaunchParameters': 'enable_nss_slurm'},
-                        {'PreemptExemptTime': self.config['slurm']['SlurmCtl']['PreemptExemptTime']},
-                        {'PreemptMode': self.config['slurm']['SlurmCtl']['PreemptMode']},
-                        {'PreemptParameters': ','.join([
-                            'reclaim_licenses',
-                            'send_user_signal',
-                            'strict_order',
-                            'youngest_first',
-                        ])},
-                        {'PreemptType': self.config['slurm']['SlurmCtl']['PreemptType']},
-                        {'PrologFlags': 'X11'},
-                        {'SchedulerParameters': ','.join([
-                            'batch_sched_delay=10',
-                            'bf_continue',
-                            'bf_interval=30',
-                            'bf_licenses',
-                            'bf_max_job_test=500',
-                            'bf_max_job_user=0',
-                            'bf_yield_interval=1000000',
-                            'default_queue_depth=10000',
-                            'max_rpc_cnt=100',
-                            'nohold_on_prolog_fail',
-                            'sched_min_internal=2000000',
-                        ])},
-                        {'ScronParameters': 'enable'},
-                    ],
-                },
-            },
-            'Tags': [
-                {
-                    'Key': 'parallelcluster-ui',
-                    'Value': 'true'
-                }
-            ]
-        }
+        self.parallel_cluster_config = self.config['slurm']['ParallelClusterConfig'].get('ClusterConfig', {})
+
+        self.parallel_cluster_config['HeadNode'] = self.parallel_cluster_config.get('HeadNode', {})
+
+        self.parallel_cluster_config['HeadNode']['Dcv'] = self.parallel_cluster_config['HeadNode'].get('Dcv', {})
 
+        self.parallel_cluster_config['HeadNode']['Dcv']['Enabled'] = self.config['slurm']['ParallelClusterConfig']['Dcv']['Enabled']
+        self.parallel_cluster_config['HeadNode']['Dcv']['Port'] = self.config['slurm']['ParallelClusterConfig']['Dcv']['Port']
         if 'AllowedIps' in self.config['slurm']['ParallelClusterConfig']['Dcv']:
-            self.parallel_cluster_config['HeadNode']['Dcv']['AllowedIps'] = self.config['slurm']['ParallelClusterConfig']['AllowedIps']
+            self.parallel_cluster_config['HeadNode']['Dcv']['AllowedIps'] = self.config['slurm']['ParallelClusterConfig']['Dcv']['AllowedIps']
 
-        if self.munge_key_secret_arn:
-                self.parallel_cluster_config['Scheduling']['SlurmSettings']['MungeKeySecretArn'] = self.munge_key_secret_arn
+        self.parallel_cluster_config['HeadNode']['Iam'] = self.parallel_cluster_config['HeadNode'].get('Iam', {})
+        self.parallel_cluster_config['HeadNode']['Iam']['AdditionalIamPolicies'] = self.parallel_cluster_config['HeadNode']['Iam'].get('AdditionalIamPolicies', [])
+        self.parallel_cluster_config['HeadNode']['Iam']['AdditionalIamPolicies'].append({'Policy': 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore'})
+        self.parallel_cluster_config['HeadNode']['Iam']['AdditionalIamPolicies'].append({'Policy': '{{ParallelClusterAssetReadPolicyArn}}'})
+        self.parallel_cluster_config['HeadNode']['Iam']['AdditionalIamPolicies'].append({'Policy': '{{ParallelClusterSnsPublishPolicyArn}}'})
+        self.parallel_cluster_config['HeadNode']['Iam']['AdditionalIamPolicies'].append({'Policy': '{{ParallelClusterJwtWritePolicyArn}}'})
+        self.parallel_cluster_config['HeadNode']['Iam']['AdditionalIamPolicies'].append({'Policy': '{{ParallelClusterMungeKeyWritePolicyArn}}'})
+        if 'AdditionalIamPolicies' in self.config['slurm']['SlurmCtl']:
+            for iam_policy_arn in self.config['slurm']['SlurmCtl']['AdditionalIamPolicies']:
+                self.parallel_cluster_config['HeadNode']['Iam']['AdditionalIamPolicies'].append({'Policy': iam_policy_arn})
+
+        self.parallel_cluster_config['HeadNode']['Imds'] = self.parallel_cluster_config['HeadNode'].get('Imds', {})
+        self.parallel_cluster_config['HeadNode']['Imds']['Secured'] =  self.config['slurm']['SlurmCtl'].get('Imds', {}).get('Secured', True)
+
+        self.parallel_cluster_config['HeadNode']['InstanceType'] =  self.config['slurm']['SlurmCtl']['instance_type']
+
+        self.parallel_cluster_config['HeadNode']['Ssh'] = self.parallel_cluster_config['HeadNode'].get('Ssh', {})
+        self.parallel_cluster_config['HeadNode']['Ssh']['KeyName'] = self.parallel_cluster_config['HeadNode']['Ssh'].get('KeyName', self.config['SshKeyPair'])
+
+        if 'volume_size' in self.config['slurm']['SlurmCtl']:
+            self.parallel_cluster_config['HeadNode']['LocalStorage'] = self.parallel_cluster_config['HeadNode'].get('LocalStorage', {})
+            self.parallel_cluster_config['HeadNode']['LocalStorage']['RootVolume'] = self.parallel_cluster_config['HeadNode']['LocalStorage'].get('RootVolume', {})
+            self.parallel_cluster_config['HeadNode']['LocalStorage']['RootVolume'] = {
+                'Size': self.config['slurm']['SlurmCtl']['volume_size']
+            }
+
+        self.parallel_cluster_config['HeadNode']['Networking'] = self.parallel_cluster_config['HeadNode'].get('Networking', {})
+        self.parallel_cluster_config['HeadNode']['Networking']['SubnetId'] = self.config['SubnetId']
+
+        self.parallel_cluster_config['HeadNode']['Networking']['AdditionalSecurityGroups'] = self.parallel_cluster_config['HeadNode']['Networking'].get('AdditionalSecurityGroups', [])
+        self.parallel_cluster_config['HeadNode']['Networking']['AdditionalSecurityGroups'].append('{{SlurmCtlSecurityGroupId}}')
+        if 'AdditionalSecurityGroups' in self.config['slurm']['SlurmCtl']:
+            for security_group_id in self.config['slurm']['SlurmCtl']['AdditionalSecurityGroups']:
+                self.parallel_cluster_config['HeadNode']['Networking']['AdditionalSecurityGroups'].append(security_group_id)
+
+        self.parallel_cluster_config['HeadNode']['CustomActions'] = self.parallel_cluster_config['HeadNode'].get('CustomActions', {})
+        self.parallel_cluster_config['HeadNode']['CustomActions']['OnNodeStart'] = self.parallel_cluster_config['HeadNode']['CustomActions'].get('OnNodeStart', {'Sequence': []})
+        self.parallel_cluster_config['HeadNode']['CustomActions']['OnNodeStart']['Sequence'].append(
+            {
+                'Script': self.custom_action_s3_urls['config/bin/on_head_node_start.sh'],
+                'Args': []
+            }
+        )
+        self.parallel_cluster_config['HeadNode']['CustomActions']['OnNodeConfigured'] = self.parallel_cluster_config['HeadNode']['CustomActions'].get('OnNodeConfigured', {'Sequence': []})
+        self.parallel_cluster_config['HeadNode']['CustomActions']['OnNodeConfigured']['Sequence'].append(
+            {
+                'Script': self.custom_action_s3_urls['config/bin/on_head_node_configured.sh'],
+                'Args': []
+            }
+        )
+        self.parallel_cluster_config['HeadNode']['CustomActions']['OnNodeUpdated'] = self.parallel_cluster_config['HeadNode']['CustomActions'].get('OnNodeUpdated', {'Sequence': []})
+        self.parallel_cluster_config['HeadNode']['CustomActions']['OnNodeUpdated']['Sequence'].append(
+            {
+                'Script': self.custom_action_s3_urls['config/bin/on_head_node_updated.sh'],
+                'Args': []
+            }
+        )
 
+        self.parallel_cluster_config['Image'] = self.parallel_cluster_config.get('Image', {})
+        self.parallel_cluster_config['Image']['Os'] = self.config['slurm']['ParallelClusterConfig']['Image']['Os']
         if 'CustomAmi' in self.config['slurm']['ParallelClusterConfig']['Image']:
             self.parallel_cluster_config['Image']['CustomAmi'] = self.config['slurm']['ParallelClusterConfig']['Image']['CustomAmi']
 
-        if 'volume_size' in self.config['slurm']['SlurmCtl']:
-            self.parallel_cluster_config['HeadNode']['LocalStorage'] = {
-                'RootVolume': {
-                    'Size': self.config['slurm']['SlurmCtl']['volume_size']
-                }
+        self.parallel_cluster_config['Imds'] = self.parallel_cluster_config.get('Imds', {'ImdsSupport': 'v2.0'})
+
+        self.parallel_cluster_config['Region'] = self.cluster_region
+
+        self.parallel_cluster_config['Scheduling'] = self.parallel_cluster_config.get('Scheduling', {})
+        self.parallel_cluster_config['Scheduling']['Scheduler'] = 'slurm'
+
+        self.parallel_cluster_config['Scheduling']['SlurmQueues'] = self.parallel_cluster_config['Scheduling'].get('SlurmQueues', [])
+
+        self.parallel_cluster_config['Scheduling']['SlurmSettings'] = self.parallel_cluster_config['Scheduling'].get('SlurmSettings', {})
+        self.parallel_cluster_config['Scheduling']['SlurmSettings']['EnableMemoryBasedScheduling'] = self.parallel_cluster_config['Scheduling']['SlurmSettings'].get('EnableMemoryBasedScheduling', True)
+
+        self.parallel_cluster_config['Scheduling']['SlurmSettings']['CustomSlurmSettings'] = self.parallel_cluster_config['Scheduling']['SlurmSettings'].get('CustomSlurmSettings', [])
+        self.parallel_cluster_config['Scheduling']['SlurmSettings']['CustomSlurmSettings'] += [
+            {'AuthAltTypes': 'auth/jwt'},
+            {'AuthAltParameters': 'jwt_key=/opt/slurm/var/spool/jwt_hs256.key'},
+            {'FederationParameters': 'fed_display'},
+            # JobRequeue must be set to 1 to enable preemption to requeue jobs.
+            {'JobRequeue': 1},
+            # {'LaunchParameters': 'enable_nss_slurm'},
+            {'PreemptExemptTime': self.config['slurm']['SlurmCtl']['PreemptExemptTime']},
+            {'PreemptMode': self.config['slurm']['SlurmCtl']['PreemptMode']},
+            {'PreemptParameters': ','.join([
+                'reclaim_licenses',
+                'send_user_signal',
+                'strict_order',
+                'youngest_first',
+            ])},
+            {'PreemptType': self.config['slurm']['SlurmCtl']['PreemptType']},
+            {'PrologFlags': 'X11'},
+            {'SchedulerParameters': ','.join([
+                'batch_sched_delay=10',
+                'bf_continue',
+                'bf_interval=30',
+                'bf_licenses',
+                'bf_max_job_test=500',
+                'bf_max_job_user=0',
+                'bf_yield_interval=1000000',
+                'default_queue_depth=10000',
+                'max_rpc_cnt=100',
+                'nohold_on_prolog_fail',
+                'sched_min_internal=2000000',
+            ])},
+            {'ScronParameters': 'enable'},
+        ]
+
+        if self.munge_key_secret_arn:
+            self.parallel_cluster_config['Scheduling']['SlurmSettings']['MungeKeySecretArn'] = self.munge_key_secret_arn
+
+        self.parallel_cluster_config['Tags'] = self.parallel_cluster_config.get('Tags', [])
+        self.parallel_cluster_config['Tags'].append(
+            {
+                'Key': 'parallelcluster-ui',
+                'Value': 'true'
             }
+        )
 
         if 'Database' in self.config['slurm']['ParallelClusterConfig']:
             for security_group_name, security_group_id in self.config['slurm']['ParallelClusterConfig']['Database']['ClientSecurityGroup'].items():
@@ -2619,6 +2596,12 @@ def create_parallel_cluster_config(self):
                         parallel_cluster_queue['Image'] = {
                             'CustomAmi': self.config['slurm']['ParallelClusterConfig']['ComputeNodeAmi']
                         }
+                    if 'AdditionalSecurityGroups' in self.config['slurm']['InstanceConfig']:
+                        for security_group_id in self.config['slurm']['InstanceConfig']['AdditionalSecurityGroups']:
+                            parallel_cluster_queue['Networking']['AdditionalSecurityGroups'].append(security_group_id)
+                    if 'AdditionalIamPolicies' in self.config['slurm']['InstanceConfig']:
+                        for iam_policy_arn in self.config['slurm']['InstanceConfig']['AdditionalIamPolicies']:
+                            parallel_cluster_queue['Iam']['AdditionalIamPolicies'].append({'Policy': iam_policy_arn})
                     number_of_queues += 1
 
                     # Give the compute node access to extra mounts
@@ -2747,6 +2730,12 @@ def create_parallel_cluster_config(self):
                         parallel_cluster_queue['Image'] = {
                             'CustomAmi': self.config['slurm']['ParallelClusterConfig']['ComputeNodeAmi']
                         }
+                    if 'AdditionalSecurityGroups' in self.config['slurm']['InstanceConfig']:
+                        for security_group_id in self.config['slurm']['InstanceConfig']['AdditionalSecurityGroups']:
+                            parallel_cluster_queue['Networking']['AdditionalSecurityGroups'].append(security_group_id)
+                    if 'AdditionalIamPolicies' in self.config['slurm']['InstanceConfig']:
+                        for iam_policy_arn in self.config['slurm']['InstanceConfig']['AdditionalIamPolicies']:
+                            parallel_cluster_queue['Iam']['AdditionalIamPolicies'].append({'Policy': iam_policy_arn})
 
                     # Give the compute node access to extra mounts
                     for fs_type in self.extra_mount_security_groups.keys():
diff --git a/source/cdk/config_schema.py b/source/cdk/config_schema.py
index ec2c22ee..a43a0b1a 100644
--- a/source/cdk/config_schema.py
+++ b/source/cdk/config_schema.py
@@ -364,7 +364,7 @@ def get_config_schema(config):
         #     Enable Cloudformation Stack termination protection
         Optional('termination_protection', default=True): bool,
         # Optional so can be specified on the command-line
-        Optional('StackName', default='slurm-top'): str,
+        Optional('StackName', default='slurm-config'): str,
         # Optional so can be specified on the command-line
         Optional('Region'): And(str, lambda s: s in valid_regions),
         # Optional so can be specified on the command-line
@@ -384,6 +384,7 @@ def get_config_schema(config):
             Optional('ParallelClusterConfig'): {
                 Optional('Enable', default=True): And(bool, lambda s: s == True),
                 'Version': And(str, lambda version: version in PARALLEL_CLUSTER_VERSIONS, lambda version: parse_version(version) >= MIN_PARALLEL_CLUSTER_VERSION),
+                Optional('ClusterConfig'): lambda s: True,
                 Optional('Image', default={'Os': DEFAULT_OS(config)}): {
                     'Os': And(str, lambda s: s in PARALLEL_CLUSTER_ALLOWED_OSES),
                     # CustomAmi: AMI to use for head and compute nodes instead of the pre-built AMIs.
@@ -397,7 +398,6 @@ def get_config_schema(config):
                 Optional('EnableEfa', default=False): bool,
                 Optional('Database'): {
                     Optional('DatabaseStackName'): str,
-                    Optional('EdaSlurmClusterStackName'): str,
                     Optional('FQDN'): str,
                     Optional('Port'): int,
                     Optional('AdminUserName'): str,
@@ -405,19 +405,23 @@ def get_config_schema(config):
                     Optional('ClientSecurityGroup'): {str: And(str, lambda s: re.match('sg-', s))},
                 },
                 Optional('Dcv', default={}): {
-                    Optional('Enable', default=False): bool,
+                    Optional('Enabled', default=False): bool,
                     Optional('Port', default=8443): int,
-                    Optional('AllowedIps'): str # Can't set a default without know the VPC's CIDR range.
+                    Optional('AllowedIps'): str # Can't set a default without knowing the VPC's CIDR range.
                 },
                 Optional('LoginNodes'): {
                     'Pools': [
                         {
                             'Name': str,
+                            'Count': int,
+                            'InstanceType': str,
+                            Optional('GracetimePeriod'): And(int, lambda s: s > 0 and s <= 120), # optional, default value: 60 mins (max 120 mins)
                             Optional('Image'): {
                                 'CustomAmi': And(str, lambda s: s.startswith('ami-'))
                             },
-                            'Count': int,
-                            'InstanceType': str,
+                            Optional('Ssh'): {
+                                'KeyName': str # default value: same ssh key used for the Head Node
+                            },
                             Optional('Networking'): {
                                 Optional('SubnetIds'): [ # Only 1 subnet supported for the MVP. Default to slurm subnet
                                     And(str, lambda s: s.startswith('subnet-'))
@@ -429,9 +433,6 @@ def get_config_schema(config):
                                     And(str, lambda s: s.startswith('sg-'))
                                 ],
                             },
-                            Optional('Ssh'): {
-                                'KeyName': str # default value: same ssh key used for the Head Node
-                            },
                             Optional('Iam'): {
                                 'InstanceRole': str,
                                 'InstanceProfile': str,
@@ -439,7 +440,6 @@ def get_config_schema(config):
                                     {'Policy': str}
                                 ]
                             },
-                            Optional('GracetimePeriod'): And(int, lambda s: s > 0 and s <= 120) # optional, default value: 60 mins (max 120 mins)
                         }
                     ]
                 }
@@ -476,6 +476,15 @@ def get_config_schema(config):
                 Optional('SlurmConfOverrides'): str,
                 Optional('SlurmrestdUid', default=901): int,
                 Optional('SlurmRestApiVersion', default=get_slurm_rest_api_version(config)): str,
+                Optional('AdditionalSecurityGroups'): [
+                    And(str, lambda s: s.startswith('sg-'))
+                ],
+                Optional('AdditionalIamPolicies'): [
+                    str
+                ],
+                Optional('Imds', default={'Secured': True}): {
+                    Optional('Secured', default=True): bool
+                }
             },
             #
             # SubmitterSecurityGroupIds:
@@ -523,6 +532,12 @@ def get_config_schema(config):
                         }
                     }
                 },
+                Optional('AdditionalSecurityGroups'): [
+                    And(str, lambda s: s.startswith('sg-'))
+                ],
+                Optional('AdditionalIamPolicies'): [
+                    str
+                ],
                 Optional('OnPremComputeNodes'): {
                     'ConfigFile': str,
                     'CIDR': str,