diff --git a/api/api/controller/datasets.py b/api/api/controller/datasets.py index d9872aa..cf53a77 100644 --- a/api/api/controller/datasets.py +++ b/api/api/controller/datasets.py @@ -90,7 +90,7 @@ async def list_all_datasets( | Parameters | Required| Usage | Example values | Definition | |---------------|---------|-----------------------------------------|------------------------------------------------------------------------------------------------------- |-----------------------| | enriched | False | Boolean Query parameter | True | enriches the metadata | - | query | False | JSON Request Body | Consult the [docs](https://github.com/no10ds/rapid-api/blob/main/docs/guides/usage/usage.md#examples-2)| the filtering query | + | query | False | JSON Request Body | Consult the [docs](https://rapid.readthedocs.io/en/latest/api/routes/dataset/#filtering-query) | the filtering query | ### Accepted permissions @@ -505,7 +505,7 @@ async def query_dataset( | `domain` | True | URL parameter | `space` | domain of the dataset | | `dataset` | True | URL parameter | `rocket_launches` | dataset title | | `version` | False | Query parameter | '3' | dataset version | - | `query` | False | JSON Request Body | Consult the [docs](https://github.com/no10ds/rapid-api/blob/main/docs/guides/usage/usage.md#how-to-construct-a-query-object)| the query object | + | `query` | False | JSON Request Body | Consult the [docs](https://rapid.readthedocs.io/en/latest/api/query/) | the query object | #### Layer @@ -616,7 +616,7 @@ async def query_large_dataset( | `domain` | True | URL parameter | `space` | domain of the dataset | | `dataset` | True | URL parameter | `rocket_launches` | dataset title | | `version` | False | Query parameter | '3' | dataset version | - | `query` | False | JSON Request Body | Consult the [docs](https://github.com/no10ds/rapid-api/blob/main/docs/guides/usage/usage.md#how-to-construct-a-query-object)| the query object | + | `query` | False | JSON Request Body | Consult the [docs](https://rapid.readthedocs.io/en/latest/api/query/) | the query object | #### Layer diff --git a/api/api/controller/schema.py b/api/api/controller/schema.py index d241990..783e056 100644 --- a/api/api/controller/schema.py +++ b/api/api/controller/schema.py @@ -51,7 +51,7 @@ async def generate_schema( In order to upload the dataset for the first time, you need to define its schema. This endpoint is provided for your convenience to generate a schema based on an existing dataset. Alternatively you can consult - the [schema writing guide](https://github.com/no10ds/rapid-api/blob/main/docs/guides/usage/schema_creation.md) if you would like to create the schema yourself. You can then use the + the [schema writing guide](https://rapid.readthedocs.io/en/latest/api/schema/) if you would like to create the schema yourself. You can then use the output of this endpoint in the Schema Upload endpoint. ⚠️ WARNING: @@ -109,7 +109,7 @@ async def upload_schema(schema: Schema): When you have a schema definition you can use this endpoint to upload it. This will allow you to subsequently upload datasets that match the schema. If you do not yet have a schema definition, you can craft this yourself (see - the [schema writing guide](https://github.com/no10ds/rapid-api/blob/main/docs/guides/usage/schema_creation.md)) or use the Schema Generation endpoint (see above). + the [schema writing guide](https://rapid.readthedocs.io/en/latest/api/schema/)) or use the Schema Generation endpoint (see above). ### Inputs @@ -159,7 +159,7 @@ async def update_schema(schema: Schema): This endpoint is for uploading an updated schema definition. This will allow you to subsequently upload datasets that match the updated schema. To create a schema definition (see - the [schema writing guide](https://github.com/no10ds/rapid-api/blob/main/docs/guides/usage/schema_creation.md)) or use the Schema Generation endpoint (see above). + the [schema writing guide](https://rapid.readthedocs.io/en/latest/api/schema/)) or use the Schema Generation endpoint (see above). ### Inputs diff --git a/api/api/entry.py b/api/api/entry.py index 12489d5..bbc9d4f 100644 --- a/api/api/entry.py +++ b/api/api/entry.py @@ -121,7 +121,7 @@ def info(): "url": PROJECT_URL, "contact": PROJECT_CONTACT, "organisation": PROJECT_ORGANISATION, - "documentation-url": "https://github.com/no10ds/rapid-api", + "documentation-url": "https://rapid.readthedocs.io/en/latest/", }, } ], diff --git a/docs/api/query.md b/docs/api/query.md index c76d8e6..4d07ec0 100644 --- a/docs/api/query.md +++ b/docs/api/query.md @@ -4,29 +4,32 @@ Data can be queried provided data has been uploaded at some point in the past. There are six values you can customise: + + - `select_columns` - - Which column(s) you want to select - - List of strings - - Can contain aggregation functions e.g.: `"avg(col1)"`, `"sum(col2)"` - - Can contain renaming of columns e.g.: `"col1 AS custom_name"` + - Which column(s) you want to select + - List of strings + - Can contain aggregation functions e.g.: `"avg(col1)"`, `"sum(col2)"` + - Can contain renaming of columns e.g.: `"col1 AS custom_name"` - `filter` - - How to filter the data - - This is provided as a raw SQL string - - Omit the `WHERE` keyword + - How to filter the data + - This is provided as a raw SQL string + - Omit the `WHERE` keyword - `group_by_columns` - - Which columns to group by - - List of column names as strings + - Which columns to group by + - List of column names as strings - `aggregation_conditions` - - What conditions you want to apply to aggregated values - - This is provided as a raw SQL string - - Omit the `HAVING` keyword + - What conditions you want to apply to aggregated values + - This is provided as a raw SQL string + - Omit the `HAVING` keyword - `order_by_columns` - - By which column(s) to order the data - - List of strings - - Defaults to ascending (`ASC`) if not provided -- `limit` - - How many rows to limit the results to - - String of an integer + - By which column(s) to order the data + - List of strings + - Defaults to ascending (`ASC`) if not provided +- `limit` - How many rows to limit the results to - String of an integer + + + For example: diff --git a/docs/api/routes/dataset.md b/docs/api/routes/dataset.md index b594863..f1b1d8a 100644 --- a/docs/api/routes/dataset.md +++ b/docs/api/routes/dataset.md @@ -122,10 +122,57 @@ None ### Inputs -| Parameters | Required | Usage | Example values | Definition | -| ---------- | -------- | ----------------------- | ------------------------------------------------------------------------------------------------------- | --------------------- | -| enriched | False | Boolean Query parameter | True | enriches the metadata | -| query | False | JSON Request Body | Consult the [docs](https://github.com/no10ds/rapid-api/blob/main/docs/guides/usage/usage.md#examples-2) | the filtering query | +| Parameters | Required | Usage | Example values | Definition | +| ---------- | -------- | ----------------------- | ---------------------------------------------------------------------------------------------- | --------------------- | +| enriched | False | Boolean Query parameter | True | enriches the metadata | +| query | False | JSON Request Body | Consult the [docs](https://rapid.readthedocs.io/en/latest/api/routes/dataset/#filtering-query) | the filtering query | + +#### Filtering Query + +**Example 1 - Filtering by tags** + +Here we retrieve all datasets that have a tag with key `tag1` with any value and `tag2` with value `value2`. + +```json +{ + "key_value_tags": { + "tag1": null, + "tag2": "value2" + } +} +``` + +**Example 2 - Filtering by sensitivity** + +```json +{ + "sensitivity": "PUBLIC" +} +``` + +**Example 3 - Filtering by tags and sensitivity** + +```json +{ + "sensitivity": "PUBLIC", + "key_value_tags": { + "tag1": null, + "tag2": "value2" + } +} +``` + +**Example 4 - Filtering by key value tags and key only tags** + +```json +{ + "sensitivity": "PUBLIC", + "key_value_tags": { + "tag2": "value2" + }, + "key_only_tags": ["tag1"] +} +``` ### Outputs @@ -208,13 +255,13 @@ You will need `READ` permission appropriate to the dataset sensitivity level, e. ### Inputs -| Parameters | Required | Usage | Example values | Definition | -| ---------- | -------- | ----------------- | ---------------------------------------------------------------------------------------------------------------------------- | --------------------- | -| `layer` | True | URL parameter | `raw` | layer of the dataset | -| `domain` | True | URL parameter | `space` | domain of the dataset | -| `dataset` | True | URL parameter | `rocket_launches` | dataset title | -| `version` | False | Query parameter | '3' | dataset version | -| `query` | False | JSON Request Body | Consult the [docs](https://github.com/no10ds/rapid-api/blob/main/docs/guides/usage/usage.md#how-to-construct-a-query-object) | the query object | +| Parameters | Required | Usage | Example values | Definition | +| ---------- | -------- | ----------------- | --------------------------------------------------------------------- | --------------------- | +| `layer` | True | URL parameter | `raw` | layer of the dataset | +| `domain` | True | URL parameter | `space` | domain of the dataset | +| `dataset` | True | URL parameter | `rocket_launches` | dataset title | +| `version` | False | Query parameter | '3' | dataset version | +| `query` | False | JSON Request Body | Consult the [docs](https://rapid.readthedocs.io/en/latest/api/query/) | the query object | ### Outputs @@ -258,13 +305,13 @@ You will need a `READ` permission appropriate to the dataset sensitivity level, ### Inputs -| Parameters | Required | Usage | Example values | Definition | -| ---------- | -------- | ----------------- | ---------------------------------------------------------------------------------------------------------------------------- | --------------------- | -| `layer` | True | URL parameter | `raw` | layer of the dataset | -| `domain` | True | URL parameter | `space` | domain of the dataset | -| `dataset` | True | URL parameter | `rocket_launches` | dataset title | -| `version` | False | Query parameter | '3' | dataset version | -| `query` | False | JSON Request Body | Consult the [docs](https://github.com/no10ds/rapid-api/blob/main/docs/guides/usage/usage.md#how-to-construct-a-query-object) | the query object | +| Parameters | Required | Usage | Example values | Definition | +| ---------- | -------- | ----------------- | --------------------------------------------------------------------- | --------------------- | +| `layer` | True | URL parameter | `raw` | layer of the dataset | +| `domain` | True | URL parameter | `space` | domain of the dataset | +| `dataset` | True | URL parameter | `rocket_launches` | dataset title | +| `version` | False | Query parameter | '3' | dataset version | +| `query` | False | JSON Request Body | Consult the [docs](https://rapid.readthedocs.io/en/latest/api/query/) | the query object | ### Outputs diff --git a/docs/changelog.md b/docs/changelog.md index 856c907..457c946 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,20 @@ # Changelog +## v7.0.7 / v0.1.5 (sdk) - _2023-11-07_ + +### Fixes + +- Issue within the sdk `upload_and_create_dataset` function where schema metadata wasn't being correctly overridden. +- Hitting maximum security group rules for the load balancer. +- Documentation improvements and removes any references to the old deprecated repositories. + +### Closes relevant GitHub issues + +- https://github.com/no10ds/rapid/issues/50 +- https://github.com/no10ds/rapid/issues/59 +- https://github.com/no10ds/rapid/issues/54 +- https://github.com/no10ds/rapid/issues/51 + ## v7.0.6 / v0.1.4 (sdk) - _2023-10-18_ ### Features @@ -70,7 +85,8 @@ - See the [migration doc](migration.md) for details on how to migrate to v7 from v6. -[Unreleased changes]: https://github.com/no10ds/rapid/compare/v7.0.6...HEAD +[Unreleased changes]: https://github.com/no10ds/rapid/compare/v7.0.7...HEAD +[v7.0.7 / v0.1.5 (sdk)]: https://github.com/no10ds/rapid/v7.0.6...v7.0.7 [v7.0.6 / v0.1.4 (sdk)]: https://github.com/no10ds/rapid/v7.0.5...v7.0.6 [v7.0.5 / v0.1.3 (sdk)]: https://github.com/no10ds/rapid/v7.0.4...v7.0.5 [v7.0.4 / v0.1.2 (sdk)]: https://github.com/no10ds/rapid/v7.0.3...v7.0.4 diff --git a/docs/sdk/useful_patterns.md b/docs/sdk/useful_patterns.md index e417137..0387060 100644 --- a/docs/sdk/useful_patterns.md +++ b/docs/sdk/useful_patterns.md @@ -7,7 +7,7 @@ Below is a simple example for uploading a Pandas DataFrame to the API. ```python import pandas as pd from rapid import Rapid -from rapid.patterns import data +from rapid.patterns import dataset from rapid.items.schema import SchemaMetadata, SensitivityLevel, Owner from rapid.exceptions import DataFrameUploadValidationException @@ -25,7 +25,7 @@ metadata = SchemaMetadata( ) try: - data.upload_and_create_dataset( + dataset.upload_and_create_dataset( rapid=rapid, df=df, metadata=metadata, upgrade_schema_on_fail=False ) except DataFrameUploadValidationException: @@ -39,7 +39,7 @@ Now going forward say for instance we now expect that for column c we can expect ```python import pandas as pd from rapid import Rapid -from rapid.patterns import data +from rapid.patterns import dataset from rapid.items.schema import SchemaMetadata, SensitivityLevel, Owner, Column from rapid.exceptions import ColumnNotDifferentException @@ -57,7 +57,7 @@ metadata = SchemaMetadata( ) try: - data.update_schema_to_dataframe( + dataset.update_schema_to_dataframe( rapid=rapid, df=df, metadata=metadata, diff --git a/infrastructure/modules/app-cluster/load_balancer.tf b/infrastructure/modules/app-cluster/load_balancer.tf index 849cc29..365c30f 100644 --- a/infrastructure/modules/app-cluster/load_balancer.tf +++ b/infrastructure/modules/app-cluster/load_balancer.tf @@ -5,7 +5,7 @@ resource "aws_alb" "application_load_balancer" { internal = false load_balancer_type = "application" subnets = var.public_subnet_ids_list - security_groups = [aws_security_group.load_balancer_security_group.id] + security_groups = [aws_security_group.load_balancer_security_group_http.id, aws_security_group.load_balancer_security_group_https.id] drop_invalid_header_fields = true enable_deletion_protection = true @@ -64,7 +64,7 @@ POLICY data "aws_ec2_managed_prefix_list" "cloudwatch" { name = "com.amazonaws.global.cloudfront.origin-facing" } -resource "aws_security_group" "load_balancer_security_group" { +resource "aws_security_group" "load_balancer_security_group_http" { # checkov:skip=CKV_AWS_260: Limits by prefix list ID's vpc_id = var.vpc_id description = "ALB Security Group" @@ -75,13 +75,6 @@ resource "aws_security_group" "load_balancer_security_group" { prefix_list_ids = [data.aws_ec2_managed_prefix_list.cloudwatch.id] description = "Allow HTTP ingress" } - ingress { - from_port = 443 - to_port = 443 - protocol = "tcp" - prefix_list_ids = [data.aws_ec2_managed_prefix_list.cloudwatch.id] - description = "Allow HTTPS ingress" - } egress { from_port = 0 to_port = 0 @@ -96,6 +89,23 @@ resource "aws_security_group" "load_balancer_security_group" { create_before_destroy = true } } +resource "aws_security_group" "load_balancer_security_group_https" { + # checkov:skip=CKV_AWS_260: Limits by prefix list ID's + vpc_id = var.vpc_id + description = "ALB Security Group" + ingress { + from_port = 443 + to_port = 443 + protocol = "tcp" + prefix_list_ids = [data.aws_ec2_managed_prefix_list.cloudwatch.id] + description = "Allow HTTPS ingress" + } + tags = var.tags + + lifecycle { + create_before_destroy = true + } +} resource "aws_lb_target_group" "target_group" { name = "${var.resource-name-prefix}-tg" diff --git a/infrastructure/modules/app-cluster/main.tf b/infrastructure/modules/app-cluster/main.tf index 998e26d..2d8e424 100644 --- a/infrastructure/modules/app-cluster/main.tf +++ b/infrastructure/modules/app-cluster/main.tf @@ -356,7 +356,8 @@ resource "aws_ecs_service" "aws-ecs-service" { assign_public_ip = false security_groups = [ aws_security_group.service_security_group.id, - aws_security_group.load_balancer_security_group.id + aws_security_group.load_balancer_security_group_http.id, + aws_security_group.load_balancer_security_group_https.id ] } @@ -376,7 +377,7 @@ resource "aws_security_group" "service_security_group" { from_port = 0 to_port = 0 protocol = "-1" - security_groups = [aws_security_group.load_balancer_security_group.id] + security_groups = [aws_security_group.load_balancer_security_group_http.id, aws_security_group.load_balancer_security_group_https.id] description = "Allow traffic from load balancer" } diff --git a/infrastructure/modules/rapid/variables.tf b/infrastructure/modules/rapid/variables.tf index c4fc13a..c982dae 100644 --- a/infrastructure/modules/rapid/variables.tf +++ b/infrastructure/modules/rapid/variables.tf @@ -13,13 +13,13 @@ variable "app-replica-count-max" { variable "application_version" { type = string description = "The version number for the application image (e.g.: v1.0.4, v1.0.x-latest, etc.)" - default = "v7.0.6" + default = "v7.0.7" } variable "ui_version" { type = string description = "The version number for the static ui (e.g.: v1.0.0, etc.)" - default = "v7.0.6" + default = "v7.0.7" } variable "catalog_disabled" { diff --git a/sdk/rapid/items/schema.py b/sdk/rapid/items/schema.py index e98a20e..c558ca4 100644 --- a/sdk/rapid/items/schema.py +++ b/sdk/rapid/items/schema.py @@ -26,8 +26,8 @@ class SchemaMetadata(BaseModel): sensitivity: SensitivityLevel owners: List[Owner] version: Optional[int] = None - key_value_tags: Optional[Dict[str, str]] = None - key_only_tags: Optional[List[str]] = None + key_value_tags: Optional[Dict[str, str]] = {} + key_only_tags: Optional[List[str]] = [] class Config: use_enum_values = True diff --git a/sdk/rapid/patterns/dataset.py b/sdk/rapid/patterns/dataset.py index 8acf708..386a962 100644 --- a/sdk/rapid/patterns/dataset.py +++ b/sdk/rapid/patterns/dataset.py @@ -30,6 +30,7 @@ def upload_and_create_dataset( schema = rapid.generate_schema( df, metadata.layer, metadata.domain, metadata.dataset, metadata.sensitivity ) + schema.metadata = metadata rapid.create_schema(schema) rapid.upload_dataframe( metadata.layer, metadata.domain, metadata.dataset, df, wait_to_complete=True diff --git a/sdk/setup.py b/sdk/setup.py index 83d8f7a..2d3f388 100644 --- a/sdk/setup.py +++ b/sdk/setup.py @@ -2,7 +2,7 @@ setup( name="rapid-sdk", - version="0.1.4", + version="0.1.5", description="A python sdk for the rAPId API", url="https://github.com/no10ds/rapid-sdk", author="Lewis Card", diff --git a/sdk/tests/test_items/test_schema.py b/sdk/tests/test_items/test_schema.py index 483c60f..eb08880 100644 --- a/sdk/tests/test_items/test_schema.py +++ b/sdk/tests/test_items/test_schema.py @@ -263,8 +263,8 @@ def test_schema_returns_correct_dictionary(self): "sensitivity": "PUBLIC", "owners": [{"name": "Test", "email": "test@email.com"}], "version": None, - "key_value_tags": None, - "key_only_tags": None, + "key_value_tags": {}, + "key_only_tags": [], }, "columns": [ { diff --git a/ui/src/components/SchemaCreate.tsx b/ui/src/components/SchemaCreate.tsx index 610b410..79bfe1f 100644 --- a/ui/src/components/SchemaCreate.tsx +++ b/ui/src/components/SchemaCreate.tsx @@ -234,10 +234,7 @@ function CreateSchema({ Consult the{' '} - + schema writing guide {' '} for further information. diff --git a/ui/src/pages/data/download/[layer]/[domain]/[dataset].tsx b/ui/src/pages/data/download/[layer]/[domain]/[dataset].tsx index 91e83a7..da5335c 100644 --- a/ui/src/pages/data/download/[layer]/[domain]/[dataset].tsx +++ b/ui/src/pages/data/download/[layer]/[domain]/[dataset].tsx @@ -179,10 +179,7 @@ function DownloadDataset() { For further information on writing queries consult the{' '} - + query writing guide diff --git a/ui/src/pages/data/download/file.tsx b/ui/src/pages/data/download/file.tsx index bd0fce3..7dc9ded 100644 --- a/ui/src/pages/data/download/file.tsx +++ b/ui/src/pages/data/download/file.tsx @@ -82,10 +82,7 @@ function FilePage() { For further information on writing queries consult the{' '} - + query writing guide diff --git a/ui/src/pages/subject/create/index.tsx b/ui/src/pages/subject/create/index.tsx index dad40d3..2321b61 100644 --- a/ui/src/pages/subject/create/index.tsx +++ b/ui/src/pages/subject/create/index.tsx @@ -118,7 +118,7 @@ function CreateUserPage() { Create a new user or client using the rAPId instance. Simply fill out the form with the required information, which can be found in more detail at the link{' '} - + provided.