diff --git a/.gitignore b/.gitignore index 68bc17f..27bcb8f 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# CDK +cdk.out diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e36ed75 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,29 @@ +repos: + - repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + language_version: python + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + language_version: python + args: ["-m", "3","--trailing-comma", "-l", "88"] + + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.238 + hooks: + - id: ruff + args: ["--fix"] + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.991 + hooks: + - id: mypy + language_version: python + additional_dependencies: + - types-requests + - types-attrs + - types-PyYAML \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..d2f2df1 --- /dev/null +++ b/app.py @@ -0,0 +1,46 @@ +import yaml +from aws_cdk import App + +from cdk_eoapi import pgStacInfra, vpc +from config import Config + +app = App() + +try: + with open("config.yaml") as f: + config = yaml.safe_load(f) + config = ( + {} if config is None else config + ) # if config is empty, set it to an empty dict + config = Config(**config) +except FileNotFoundError: + # if no config at the expected path, using defaults + config = Config() + +vpc_stack = vpc.VpcStack( + tags=config.tags, + scope=app, + id=config.build_service_name("pgSTAC-vpc"), + nat_gateway_count=config.nat_gateway_count, +) + + +pgstac_infra_stack = pgStacInfra.pgStacInfraStack( + scope=app, + tags=config.tags, + id=config.build_service_name("pgSTAC-infra"), + vpc=vpc_stack.vpc, + stac_api_lambda_name=config.build_service_name("STAC API"), + titiler_pgstac_api_lambda_name=config.build_service_name("titiler pgSTAC API"), + stage=config.stage, + db_allocated_storage=config.db_allocated_storage, + public_db_subnet=config.public_db_subnet, + db_instance_type=config.db_instance_type, + bastion_host_allow_ip_list=config.bastion_host_allow_ip_list, + bastion_host_create_elastic_ip=config.bastion_host_create_elastic_ip, + bastion_host_user_data=yaml.dump(config.bastion_host_user_data), + titiler_buckets=config.titiler_buckets, + data_access_role_arn=config.data_access_role_arn, + auth_provider_jwks_url=config.auth_provider_jwks_url, +) +app.synth() diff --git a/cdk.json b/cdk.json new file mode 100644 index 0000000..d9313bf --- /dev/null +++ b/cdk.json @@ -0,0 +1,32 @@ +{ + "app": "python3 app.py", + "watch": { + "include": [ + "**" + ], + "exclude": [ + "README.md", + "cdk*.json", + "requirements*.txt", + "source.bat", + "**/*.pyc", + "**/*.tmp", + "**/__pycache__", + "tests", + "scripts", + "*venv" + ] + }, + "context": { + "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, + "@aws-cdk/core:stackRelativeExports": true, + "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, + "@aws-cdk/aws-lambda:recognizeVersionProps": true, + "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true, + "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, + "@aws-cdk/core:target-partitions": [ + "aws", + "aws-cn" + ] + } +} \ No newline at end of file diff --git a/cdk_eoapi/__init__.py b/cdk_eoapi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cdk_eoapi/pgStacInfra.py b/cdk_eoapi/pgStacInfra.py new file mode 100644 index 0000000..6d7f0ae --- /dev/null +++ b/cdk_eoapi/pgStacInfra.py @@ -0,0 +1,187 @@ +from typing import Optional, Union + +import boto3 +from aws_cdk import Stack, aws_ec2, aws_iam, aws_rds +from cdk_pgstac import ( + BastionHost, + PgStacApiLambda, + PgStacDatabase, + StacIngestor, + TitilerPgstacApiLambda, +) +from constructs import Construct + + +class pgStacInfraStack(Stack): + def __init__( + self, + scope: Construct, + id: str, + vpc: aws_ec2.Vpc, + stage: str, + db_allocated_storage: int, + public_db_subnet: bool, + db_instance_type: str, + stac_api_lambda_name: str, + titiler_pgstac_api_lambda_name: str, + bastion_host_allow_ip_list: list, + bastion_host_create_elastic_ip: bool, + titiler_buckets: list, + data_access_role_arn: Optional[str], + auth_provider_jwks_url: Optional[str], + bastion_host_user_data: Union[str, aws_ec2.UserData], + **kwargs, + ) -> None: + super().__init__(scope, id, **kwargs) + + pgstac_db = PgStacDatabase( + self, + "pgstac-db", + vpc=vpc, + engine=aws_rds.DatabaseInstanceEngine.postgres( + version=aws_rds.PostgresEngineVersion.VER_14 + ), + vpc_subnets=aws_ec2.SubnetSelection( + subnet_type=aws_ec2.SubnetType.PUBLIC + if public_db_subnet + else aws_ec2.SubnetType.PRIVATE_ISOLATED + ), + allocated_storage=db_allocated_storage, + instance_type=aws_ec2.InstanceType(db_instance_type), + ) + + stac_api_lambda = PgStacApiLambda( + self, + "pgstac-api", + api_env={"NAME": stac_api_lambda_name, "description": f"{stage} STAC API"}, + vpc=vpc, + db=pgstac_db.db, + db_secret=pgstac_db.pgstac_secret, + subnet_selection=aws_ec2.SubnetSelection( + subnet_type=aws_ec2.SubnetType.PUBLIC + if public_db_subnet + else aws_ec2.SubnetType.PRIVATE_WITH_EGRESS + ), + ) + + TitilerPgstacApiLambda( + self, + "titiler-pgstac-api", + api_env={ + "NAME": titiler_pgstac_api_lambda_name, + "description": f"{stage} titiler pgstac API", + }, + vpc=vpc, + db=pgstac_db.db, + db_secret=pgstac_db.pgstac_secret, + subnet_selection=aws_ec2.SubnetSelection( + subnet_type=aws_ec2.SubnetType.PUBLIC + if public_db_subnet + else aws_ec2.SubnetType.PRIVATE_WITH_EGRESS + ), + buckets=titiler_buckets, + ) + + BastionHost( + self, + "bastion-host", + vpc=vpc, + db=pgstac_db.db, + ipv4_allowlist=bastion_host_allow_ip_list, + user_data=aws_ec2.UserData.custom(bastion_host_user_data) + if bastion_host_user_data + else aws_ec2.UserData.for_linux(), + create_elastic_ip=bastion_host_create_elastic_ip, + ) + + if data_access_role_arn: + # importing provided role from arn. + # the stac ingestor will try to assume it when called, + # so it must be listed in the data access role trust policy. + data_access_role = aws_iam.Role.from_role_arn( + self, + "data-access-role", + role_arn=data_access_role_arn, + ) + else: + data_access_role = self._create_data_access_role() + + stac_ingestor_env = {"REQUESTER_PAYS": "True"} + + if auth_provider_jwks_url: + stac_ingestor_env["JWKS_URL"] = auth_provider_jwks_url + + stac_ingestor = StacIngestor( + self, + "stac-ingestor", + stac_url=stac_api_lambda.url, + stage=stage, + vpc=vpc, + data_access_role=data_access_role, + stac_db_secret=pgstac_db.pgstac_secret, + stac_db_security_group=pgstac_db.db.connections.security_groups[0], + subnet_selection=aws_ec2.SubnetSelection( + subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS + ), + api_env=stac_ingestor_env, + ) + + # we can only do that if the role is created here. + # If injecting a role, that role's trust relationship + # must be already set up, or set up after this deployment. + if not data_access_role_arn: + data_access_role = self._grant_assume_role_with_principal_pattern( + data_access_role, stac_ingestor.handler_role.role_name + ) + + def _create_data_access_role(self) -> aws_iam.Role: + + """ + Creates an IAM role with full S3 read access. + """ + + data_access_role = aws_iam.Role( + self, + "data-access-role", + assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"), + ) + + data_access_role.add_to_policy( + aws_iam.PolicyStatement( + actions=[ + "s3:Get*", + ], + resources=["*"], + effect=aws_iam.Effect.ALLOW, + ) + ) + return data_access_role + + def _grant_assume_role_with_principal_pattern( + self, + role_to_assume: aws_iam.Role, + principal_pattern: str, + account_id: str = boto3.client("sts").get_caller_identity().get("Account"), + ) -> aws_iam.Role: + """ + Grants assume role permissions to the role of the given + account with the given name pattern. Default account + is the current account. + """ + + role_to_assume.assume_role_policy.add_statements( + aws_iam.PolicyStatement( + effect=aws_iam.Effect.ALLOW, + principals=[aws_iam.AnyPrincipal()], + actions=["sts:AssumeRole"], + conditions={ + "StringLike": { + "aws:PrincipalArn": [ + f"arn:aws:iam::{account_id}:role/{principal_pattern}" + ] + } + }, + ) + ) + + return role_to_assume diff --git a/cdk_eoapi/vpc.py b/cdk_eoapi/vpc.py new file mode 100644 index 0000000..fdc97ff --- /dev/null +++ b/cdk_eoapi/vpc.py @@ -0,0 +1,50 @@ +from aws_cdk import Stack, aws_ec2 +from constructs import Construct + + +class VpcStack(Stack): + def __init__( + self, scope: Construct, id: str, nat_gateway_count: int, **kwargs + ) -> None: + super().__init__(scope, id, **kwargs) + + self.vpc = aws_ec2.Vpc( + self, + "vpc", + subnet_configuration=[ + aws_ec2.SubnetConfiguration( + name="ingress", subnet_type=aws_ec2.SubnetType.PUBLIC, cidr_mask=24 + ), + aws_ec2.SubnetConfiguration( + name="application", + subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS, + cidr_mask=24, + ), + aws_ec2.SubnetConfiguration( + name="rds", + subnet_type=aws_ec2.SubnetType.PRIVATE_ISOLATED, + cidr_mask=24, + ), + ], + nat_gateways=nat_gateway_count, + ) + + self.vpc.add_gateway_endpoint( + "DynamoDbEndpoint", service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB + ) + + self.vpc.add_interface_endpoint( + "SecretsManagerEndpoint", + service=aws_ec2.InterfaceVpcEndpointAwsService.SECRETS_MANAGER, + ) + + self.export_value( + self.vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PUBLIC) + .subnets[0] + .subnet_id + ) + self.export_value( + self.vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PUBLIC) + .subnets[1] + .subnet_id + ) diff --git a/config.py b/config.py new file mode 100644 index 0000000..6bca84d --- /dev/null +++ b/config.py @@ -0,0 +1,95 @@ +from typing import Any, Dict, List, Optional, Union + +import pydantic +from aws_cdk import aws_ec2 +from pydantic_core.core_schema import FieldValidationInfo +from pydantic_settings import BaseSettings + +DEFAULT_PROJECT_ID = "cdk-eoapi-demo" +DEFAULT_STAGE = "test" +DEFAULT_NAT_GATEWAY_COUNT = 1 + + +class Config(BaseSettings): + project_id: Optional[str] = pydantic.Field( + description="Project ID", default=DEFAULT_PROJECT_ID + ) + stage: Optional[str] = pydantic.Field( + description="Stage of deployment", default=DEFAULT_STAGE + ) + tags: Optional[Dict[str, str]] = pydantic.Field( + description="""Tags to apply to resources. If none provided, + will default to the defaults defined in `default_tags`. + Note that if tags are passed to the CDK CLI via `--tags`, + they will override any tags defined here.""", + default=None, + ) + auth_provider_jwks_url: Optional[str] = pydantic.Field( + description="""Auth Provider JSON Web Key Set URL for + ingestion authentication. If not provided, + no authentication will be required.""", + default=None, + ) + data_access_role_arn: Optional[str] = pydantic.Field( + description="""Role ARN for data access, that will be + used by the STAC ingestor for validation of assets + located in S3 and for the tiler application to access + assets located in S3. If none, the role will be + created at runtime with full S3 read access. If + provided, the existing role must be configured to + allow the tiler and STAC ingestor lambda roles to + assume it. See https://github.com/developmentseed/cdk-pgstac""", + default=None, + ) + db_instance_type: Optional[str] = pydantic.Field( + description="Database instance type", default="t3.micro" + ) + db_allocated_storage: Optional[int] = pydantic.Field( + description="Allocated storage for the database", default=5 + ) + public_db_subnet: Optional[bool] = pydantic.Field( + description="Whether to put the database in a public subnet", default=False + ) + nat_gateway_count: Optional[int] = pydantic.Field( + description="Number of NAT gateways to create", + default=DEFAULT_NAT_GATEWAY_COUNT, + ) + bastion_host_create_elastic_ip: Optional[bool] = pydantic.Field( + description="Whether to create an elastic IP for the bastion host", + default=False, + ) + bastion_host_allow_ip_list: Optional[List[str]] = pydantic.Field( + description="""YAML file containing list of IP addresses to + allow SSH access to the bastion host""", + default=[], + ) + bastion_host_user_data: Optional[ + Union[Dict[str, Any], aws_ec2.UserData] + ] = pydantic.Field( + description="Path to file containing user data for the bastion host", + default=aws_ec2.UserData.for_linux(), + ) + titiler_buckets: Optional[List[str]] = pydantic.Field( + description="""Path to YAML file containing list of + buckets to grant access to the titiler API""", + default=[], + ) + + @pydantic.field_validator("tags") + def default_tags(cls, v, info: FieldValidationInfo): + return v or {"project_id": info.data["project_id"], "stage": info.data["stage"]} + + @pydantic.field_validator("nat_gateway_count") + def validate_nat_gateway_count(cls, v, info: FieldValidationInfo): + if not info.data["public_db_subnet"] and v <= 0: + raise ValueError( + """if the database and its associated services instances + are to be located in the private subnet of the VPC, NAT + gateways are needed to allow egress from the services + and therefore `nat_gateway_count` has to be > 0.""" + ) + else: + return v + + def build_service_name(self, service_id: str) -> str: + return f"{self.project_id}-{self.stage}-{service_id}" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9227b3d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +aws-cdk-lib>=2.75.0 +aws_cdk.aws_cognito_identitypool_alpha>=2.75.0a0 +cdk-pgstac==4.2.3 +constructs>=10.0.0,<11.0.0 +pydantic==2.0.2 +pydantic-settings==2.0.1 +black==22.3.0 +boto3==1.24.15 +boto3-stubs[cognito-idp,cognito-identity] +flake8==4.0.1 +click==8.1.3 +requests==2.28.0 +python-dotenv==1.0.0 +pyyaml==6.0 +types-PyYAML==6.0.12.10 \ No newline at end of file