From 0a32caf790b2094dfa69317059a46c4d801d93ea Mon Sep 17 00:00:00 2001 From: Emile Tenezakis Date: Tue, 11 Jul 2023 17:03:39 -0700 Subject: [PATCH] Initial PR : Create a public demonstration repository named cdk-eoapi (#1) * initial commit * format, add pre-commit, add comments regarding the need for the trust relationship to be manually established if the data access role is injected, and if not injected move the creation of the trust relationship policy to after the stac ingestor creation to be able to use the exact arn * add format github action based on pre-commitgs * try removing a file to check if pre commit ci is running * avoid using dict * add tags as an option and default to project id and stage * add docker ignore file * fix docs * docs for data access role * bump pydantic, add nat gateway count validator in config * refer to cdk-pgstac docs for data access role if pre-configured --- .gitignore | 3 + .pre-commit-config.yaml | 29 ++++++ app.py | 46 ++++++++++ cdk.json | 32 +++++++ cdk_eoapi/__init__.py | 0 cdk_eoapi/pgStacInfra.py | 187 +++++++++++++++++++++++++++++++++++++++ cdk_eoapi/vpc.py | 50 +++++++++++ config.py | 95 ++++++++++++++++++++ requirements.txt | 15 ++++ 9 files changed, 457 insertions(+) create mode 100644 .pre-commit-config.yaml create mode 100644 app.py create mode 100644 cdk.json create mode 100644 cdk_eoapi/__init__.py create mode 100644 cdk_eoapi/pgStacInfra.py create mode 100644 cdk_eoapi/vpc.py create mode 100644 config.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 68bc17f..27bcb8f 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +# CDK +cdk.out diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e36ed75 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,29 @@ +repos: + - repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + language_version: python + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + language_version: python + args: ["-m", "3","--trailing-comma", "-l", "88"] + + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.238 + hooks: + - id: ruff + args: ["--fix"] + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.991 + hooks: + - id: mypy + language_version: python + additional_dependencies: + - types-requests + - types-attrs + - types-PyYAML \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..d2f2df1 --- /dev/null +++ b/app.py @@ -0,0 +1,46 @@ +import yaml +from aws_cdk import App + +from cdk_eoapi import pgStacInfra, vpc +from config import Config + +app = App() + +try: + with open("config.yaml") as f: + config = yaml.safe_load(f) + config = ( + {} if config is None else config + ) # if config is empty, set it to an empty dict + config = Config(**config) +except FileNotFoundError: + # if no config at the expected path, using defaults + config = Config() + +vpc_stack = vpc.VpcStack( + tags=config.tags, + scope=app, + id=config.build_service_name("pgSTAC-vpc"), + nat_gateway_count=config.nat_gateway_count, +) + + +pgstac_infra_stack = pgStacInfra.pgStacInfraStack( + scope=app, + tags=config.tags, + id=config.build_service_name("pgSTAC-infra"), + vpc=vpc_stack.vpc, + stac_api_lambda_name=config.build_service_name("STAC API"), + titiler_pgstac_api_lambda_name=config.build_service_name("titiler pgSTAC API"), + stage=config.stage, + db_allocated_storage=config.db_allocated_storage, + public_db_subnet=config.public_db_subnet, + db_instance_type=config.db_instance_type, + bastion_host_allow_ip_list=config.bastion_host_allow_ip_list, + bastion_host_create_elastic_ip=config.bastion_host_create_elastic_ip, + bastion_host_user_data=yaml.dump(config.bastion_host_user_data), + titiler_buckets=config.titiler_buckets, + data_access_role_arn=config.data_access_role_arn, + auth_provider_jwks_url=config.auth_provider_jwks_url, +) +app.synth() diff --git a/cdk.json b/cdk.json new file mode 100644 index 0000000..d9313bf --- /dev/null +++ b/cdk.json @@ -0,0 +1,32 @@ +{ + "app": "python3 app.py", + "watch": { + "include": [ + "**" + ], + "exclude": [ + "README.md", + "cdk*.json", + "requirements*.txt", + "source.bat", + "**/*.pyc", + "**/*.tmp", + "**/__pycache__", + "tests", + "scripts", + "*venv" + ] + }, + "context": { + "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, + "@aws-cdk/core:stackRelativeExports": true, + "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, + "@aws-cdk/aws-lambda:recognizeVersionProps": true, + "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true, + "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, + "@aws-cdk/core:target-partitions": [ + "aws", + "aws-cn" + ] + } +} \ No newline at end of file diff --git a/cdk_eoapi/__init__.py b/cdk_eoapi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cdk_eoapi/pgStacInfra.py b/cdk_eoapi/pgStacInfra.py new file mode 100644 index 0000000..6d7f0ae --- /dev/null +++ b/cdk_eoapi/pgStacInfra.py @@ -0,0 +1,187 @@ +from typing import Optional, Union + +import boto3 +from aws_cdk import Stack, aws_ec2, aws_iam, aws_rds +from cdk_pgstac import ( + BastionHost, + PgStacApiLambda, + PgStacDatabase, + StacIngestor, + TitilerPgstacApiLambda, +) +from constructs import Construct + + +class pgStacInfraStack(Stack): + def __init__( + self, + scope: Construct, + id: str, + vpc: aws_ec2.Vpc, + stage: str, + db_allocated_storage: int, + public_db_subnet: bool, + db_instance_type: str, + stac_api_lambda_name: str, + titiler_pgstac_api_lambda_name: str, + bastion_host_allow_ip_list: list, + bastion_host_create_elastic_ip: bool, + titiler_buckets: list, + data_access_role_arn: Optional[str], + auth_provider_jwks_url: Optional[str], + bastion_host_user_data: Union[str, aws_ec2.UserData], + **kwargs, + ) -> None: + super().__init__(scope, id, **kwargs) + + pgstac_db = PgStacDatabase( + self, + "pgstac-db", + vpc=vpc, + engine=aws_rds.DatabaseInstanceEngine.postgres( + version=aws_rds.PostgresEngineVersion.VER_14 + ), + vpc_subnets=aws_ec2.SubnetSelection( + subnet_type=aws_ec2.SubnetType.PUBLIC + if public_db_subnet + else aws_ec2.SubnetType.PRIVATE_ISOLATED + ), + allocated_storage=db_allocated_storage, + instance_type=aws_ec2.InstanceType(db_instance_type), + ) + + stac_api_lambda = PgStacApiLambda( + self, + "pgstac-api", + api_env={"NAME": stac_api_lambda_name, "description": f"{stage} STAC API"}, + vpc=vpc, + db=pgstac_db.db, + db_secret=pgstac_db.pgstac_secret, + subnet_selection=aws_ec2.SubnetSelection( + subnet_type=aws_ec2.SubnetType.PUBLIC + if public_db_subnet + else aws_ec2.SubnetType.PRIVATE_WITH_EGRESS + ), + ) + + TitilerPgstacApiLambda( + self, + "titiler-pgstac-api", + api_env={ + "NAME": titiler_pgstac_api_lambda_name, + "description": f"{stage} titiler pgstac API", + }, + vpc=vpc, + db=pgstac_db.db, + db_secret=pgstac_db.pgstac_secret, + subnet_selection=aws_ec2.SubnetSelection( + subnet_type=aws_ec2.SubnetType.PUBLIC + if public_db_subnet + else aws_ec2.SubnetType.PRIVATE_WITH_EGRESS + ), + buckets=titiler_buckets, + ) + + BastionHost( + self, + "bastion-host", + vpc=vpc, + db=pgstac_db.db, + ipv4_allowlist=bastion_host_allow_ip_list, + user_data=aws_ec2.UserData.custom(bastion_host_user_data) + if bastion_host_user_data + else aws_ec2.UserData.for_linux(), + create_elastic_ip=bastion_host_create_elastic_ip, + ) + + if data_access_role_arn: + # importing provided role from arn. + # the stac ingestor will try to assume it when called, + # so it must be listed in the data access role trust policy. + data_access_role = aws_iam.Role.from_role_arn( + self, + "data-access-role", + role_arn=data_access_role_arn, + ) + else: + data_access_role = self._create_data_access_role() + + stac_ingestor_env = {"REQUESTER_PAYS": "True"} + + if auth_provider_jwks_url: + stac_ingestor_env["JWKS_URL"] = auth_provider_jwks_url + + stac_ingestor = StacIngestor( + self, + "stac-ingestor", + stac_url=stac_api_lambda.url, + stage=stage, + vpc=vpc, + data_access_role=data_access_role, + stac_db_secret=pgstac_db.pgstac_secret, + stac_db_security_group=pgstac_db.db.connections.security_groups[0], + subnet_selection=aws_ec2.SubnetSelection( + subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS + ), + api_env=stac_ingestor_env, + ) + + # we can only do that if the role is created here. + # If injecting a role, that role's trust relationship + # must be already set up, or set up after this deployment. + if not data_access_role_arn: + data_access_role = self._grant_assume_role_with_principal_pattern( + data_access_role, stac_ingestor.handler_role.role_name + ) + + def _create_data_access_role(self) -> aws_iam.Role: + + """ + Creates an IAM role with full S3 read access. + """ + + data_access_role = aws_iam.Role( + self, + "data-access-role", + assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"), + ) + + data_access_role.add_to_policy( + aws_iam.PolicyStatement( + actions=[ + "s3:Get*", + ], + resources=["*"], + effect=aws_iam.Effect.ALLOW, + ) + ) + return data_access_role + + def _grant_assume_role_with_principal_pattern( + self, + role_to_assume: aws_iam.Role, + principal_pattern: str, + account_id: str = boto3.client("sts").get_caller_identity().get("Account"), + ) -> aws_iam.Role: + """ + Grants assume role permissions to the role of the given + account with the given name pattern. Default account + is the current account. + """ + + role_to_assume.assume_role_policy.add_statements( + aws_iam.PolicyStatement( + effect=aws_iam.Effect.ALLOW, + principals=[aws_iam.AnyPrincipal()], + actions=["sts:AssumeRole"], + conditions={ + "StringLike": { + "aws:PrincipalArn": [ + f"arn:aws:iam::{account_id}:role/{principal_pattern}" + ] + } + }, + ) + ) + + return role_to_assume diff --git a/cdk_eoapi/vpc.py b/cdk_eoapi/vpc.py new file mode 100644 index 0000000..fdc97ff --- /dev/null +++ b/cdk_eoapi/vpc.py @@ -0,0 +1,50 @@ +from aws_cdk import Stack, aws_ec2 +from constructs import Construct + + +class VpcStack(Stack): + def __init__( + self, scope: Construct, id: str, nat_gateway_count: int, **kwargs + ) -> None: + super().__init__(scope, id, **kwargs) + + self.vpc = aws_ec2.Vpc( + self, + "vpc", + subnet_configuration=[ + aws_ec2.SubnetConfiguration( + name="ingress", subnet_type=aws_ec2.SubnetType.PUBLIC, cidr_mask=24 + ), + aws_ec2.SubnetConfiguration( + name="application", + subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_EGRESS, + cidr_mask=24, + ), + aws_ec2.SubnetConfiguration( + name="rds", + subnet_type=aws_ec2.SubnetType.PRIVATE_ISOLATED, + cidr_mask=24, + ), + ], + nat_gateways=nat_gateway_count, + ) + + self.vpc.add_gateway_endpoint( + "DynamoDbEndpoint", service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB + ) + + self.vpc.add_interface_endpoint( + "SecretsManagerEndpoint", + service=aws_ec2.InterfaceVpcEndpointAwsService.SECRETS_MANAGER, + ) + + self.export_value( + self.vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PUBLIC) + .subnets[0] + .subnet_id + ) + self.export_value( + self.vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PUBLIC) + .subnets[1] + .subnet_id + ) diff --git a/config.py b/config.py new file mode 100644 index 0000000..6bca84d --- /dev/null +++ b/config.py @@ -0,0 +1,95 @@ +from typing import Any, Dict, List, Optional, Union + +import pydantic +from aws_cdk import aws_ec2 +from pydantic_core.core_schema import FieldValidationInfo +from pydantic_settings import BaseSettings + +DEFAULT_PROJECT_ID = "cdk-eoapi-demo" +DEFAULT_STAGE = "test" +DEFAULT_NAT_GATEWAY_COUNT = 1 + + +class Config(BaseSettings): + project_id: Optional[str] = pydantic.Field( + description="Project ID", default=DEFAULT_PROJECT_ID + ) + stage: Optional[str] = pydantic.Field( + description="Stage of deployment", default=DEFAULT_STAGE + ) + tags: Optional[Dict[str, str]] = pydantic.Field( + description="""Tags to apply to resources. If none provided, + will default to the defaults defined in `default_tags`. + Note that if tags are passed to the CDK CLI via `--tags`, + they will override any tags defined here.""", + default=None, + ) + auth_provider_jwks_url: Optional[str] = pydantic.Field( + description="""Auth Provider JSON Web Key Set URL for + ingestion authentication. If not provided, + no authentication will be required.""", + default=None, + ) + data_access_role_arn: Optional[str] = pydantic.Field( + description="""Role ARN for data access, that will be + used by the STAC ingestor for validation of assets + located in S3 and for the tiler application to access + assets located in S3. If none, the role will be + created at runtime with full S3 read access. If + provided, the existing role must be configured to + allow the tiler and STAC ingestor lambda roles to + assume it. See https://github.com/developmentseed/cdk-pgstac""", + default=None, + ) + db_instance_type: Optional[str] = pydantic.Field( + description="Database instance type", default="t3.micro" + ) + db_allocated_storage: Optional[int] = pydantic.Field( + description="Allocated storage for the database", default=5 + ) + public_db_subnet: Optional[bool] = pydantic.Field( + description="Whether to put the database in a public subnet", default=False + ) + nat_gateway_count: Optional[int] = pydantic.Field( + description="Number of NAT gateways to create", + default=DEFAULT_NAT_GATEWAY_COUNT, + ) + bastion_host_create_elastic_ip: Optional[bool] = pydantic.Field( + description="Whether to create an elastic IP for the bastion host", + default=False, + ) + bastion_host_allow_ip_list: Optional[List[str]] = pydantic.Field( + description="""YAML file containing list of IP addresses to + allow SSH access to the bastion host""", + default=[], + ) + bastion_host_user_data: Optional[ + Union[Dict[str, Any], aws_ec2.UserData] + ] = pydantic.Field( + description="Path to file containing user data for the bastion host", + default=aws_ec2.UserData.for_linux(), + ) + titiler_buckets: Optional[List[str]] = pydantic.Field( + description="""Path to YAML file containing list of + buckets to grant access to the titiler API""", + default=[], + ) + + @pydantic.field_validator("tags") + def default_tags(cls, v, info: FieldValidationInfo): + return v or {"project_id": info.data["project_id"], "stage": info.data["stage"]} + + @pydantic.field_validator("nat_gateway_count") + def validate_nat_gateway_count(cls, v, info: FieldValidationInfo): + if not info.data["public_db_subnet"] and v <= 0: + raise ValueError( + """if the database and its associated services instances + are to be located in the private subnet of the VPC, NAT + gateways are needed to allow egress from the services + and therefore `nat_gateway_count` has to be > 0.""" + ) + else: + return v + + def build_service_name(self, service_id: str) -> str: + return f"{self.project_id}-{self.stage}-{service_id}" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9227b3d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +aws-cdk-lib>=2.75.0 +aws_cdk.aws_cognito_identitypool_alpha>=2.75.0a0 +cdk-pgstac==4.2.3 +constructs>=10.0.0,<11.0.0 +pydantic==2.0.2 +pydantic-settings==2.0.1 +black==22.3.0 +boto3==1.24.15 +boto3-stubs[cognito-idp,cognito-identity] +flake8==4.0.1 +click==8.1.3 +requests==2.28.0 +python-dotenv==1.0.0 +pyyaml==6.0 +types-PyYAML==6.0.12.10 \ No newline at end of file