Skip to content

Commit

Permalink
Merge branch 'release/7.12.0' into feature/separate-sdk
Browse files Browse the repository at this point in the history
  • Loading branch information
MotwaniM committed Nov 7, 2024
2 parents 45bcc8e + 6e2bffb commit efeb90b
Show file tree
Hide file tree
Showing 20 changed files with 161 additions and 50 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/release_api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,17 @@ jobs:
TAG: ${{ github.event.release.tag_name }}
GH_TOKEN: ${{ github.token }}
run: make frontend/zip-and-release tag=$TAG

cleanup:
needs:
- setup
- api-release
runs-on: self-hosted
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Clean Docker Context
if: always()
run: make api/clean-docker

3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
export

# Versions
PYTHON_VERSION=3.10.6
PYTHON_VERSION=3.12.6
NODE_VERSION=lts/iron

# Git references
GITHUB_SHA=$$(git rev-parse HEAD)
GITHUB_REF_NAME=$$(git rev-parse --abbrev-ref HEAD)
GITHUB_SHORT_SHA=$$(git rev-parse --short HEAD)


# API Build variables
API_ACCOUNT_ECR_URI=$(AWS_ACCOUNT).dkr.ecr.$(AWS_REGION).amazonaws.com
API_PUBLIC_URI=public.ecr.aws
Expand Down
2 changes: 1 addition & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#checkov:skip=CKV_DOCKER_9: Allow for use of apt
#checkov:skip=CKV_DOCKER_2: No need for healthcheck in container
#checkov:skip=CKV_DOCKER_3: No need for user in container
FROM python:3.10-slim
FROM python:3.12-slim

WORKDIR /app
RUN apt update
Expand Down
3 changes: 3 additions & 0 deletions backend/api/application/services/dataset_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ def dataset_has_correct_data_types(
data_frame,
)
for column in schema.columns:
if column.name not in column_types:
continue

actual_type = column_types[column.name]
expected_type = column.data_type

Expand Down
2 changes: 2 additions & 0 deletions backend/api/domain/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def is_date_type(type: str) -> bool:
def extract_athena_types(df: DataFrame) -> dict:
types = {}
for column in df.columns:
if df[column].dropna().size == 0:
continue
dtype = str(infer_dtype(df[column], skipna=True))
try:
types[column] = PANDAS_TO_ATHENA_CONVERTER[dtype].value
Expand Down
14 changes: 7 additions & 7 deletions backend/image-utils.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
set -eu -o pipefail

LATEST_TAG=$(git rev-parse --short "$GITHUB_SHA")
LATEST_TAG=$(git rev-parse --short HEAD)
IGNORE_LIST_FILE=vulnerability-ignore-list.txt

RETRIES=0
Expand All @@ -19,7 +19,7 @@ function _scan_in_progress {

STATUS=$(aws ecr describe-image-scan-findings \
--region "$AWS_REGION" \
--repository-name "$IMAGE_NAME" \
--repository-name "$API_IMAGE_NAME" \
--image-id imageTag="$LATEST_TAG" \
| jq '.imageScanStatus.status' \
| jq -r .)
Expand Down Expand Up @@ -47,7 +47,7 @@ function _get_high_or_critical_vulnerabilities {
VULNS=()
while IFS='' read -r line; do VULNS+=("$line"); done < <(aws ecr describe-image-scan-findings \
--region "$AWS_REGION" \
--repository-name "$IMAGE_NAME" \
--repository-name "$API_IMAGE_NAME" \
--image-id imageTag="$1" \
| jq '.imageScanFindings.findings[] | select(.severity == "HIGH" or .severity == "CRITICAL") | (.name + "_" + .uri)' \
| jq -r .)
Expand All @@ -72,7 +72,7 @@ function get_image_sha_if_exists {
set +e
IMAGE_METADATA="$( aws ecr describe-images \
--region "$AWS_REGION" \
--repository-name=$IMAGE_NAME \
--repository-name="$API_IMAGE_NAME" \
--image-ids=imageTag="$1" 2> /dev/null )"
set -e
if [[ $? == 0 ]]; then
Expand Down Expand Up @@ -117,13 +117,13 @@ function tag_image {

MANIFEST=$(aws ecr batch-get-image \
--region "$AWS_REGION" \
--repository-name "$IMAGE_NAME" \
--repository-name "$API_IMAGE_NAME" \
--image-ids imageTag="$2" \
--query 'images[].imageManifest' --output text)

aws ecr put-image \
--region "$AWS_REGION" \
--repository-name "$IMAGE_NAME" \
--repository-name "$API_IMAGE_NAME" \
--image-tag "$1" \
--image-manifest "$MANIFEST" > /dev/null
}
Expand All @@ -133,7 +133,7 @@ function _untag_and_delete {

aws ecr batch-delete-image \
--region "$AWS_REGION" \
--repository-name "$IMAGE_NAME" \
--repository-name "$API_IMAGE_NAME" \
--image-ids imageTag="$1" > /dev/null
}

Expand Down
6 changes: 3 additions & 3 deletions backend/rapid/rapid.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from datetime import datetime
from typing import Dict, Optional
from io import StringIO

import pandas as pd

Expand Down Expand Up @@ -139,12 +140,11 @@ def download_dataframe(
data=json.dumps(query.dict(exclude_none=True)),
timeout=TIMEOUT_PERIOD,
)
data = json.loads(response.content.decode("utf-8"))
if response.status_code == 200:
return pd.read_json(json.dumps(data), orient="index")
return pd.read_json(StringIO(response.content.decode("utf-8")), orient="index")

raise DatasetNotFoundException(
f"Could not find dataset, {layer}/{domain}/{dataset} to download", data
f"Could not find dataset, {layer}/{domain}/{dataset} to download", response.json()
)

def upload_dataframe(
Expand Down
1 change: 0 additions & 1 deletion backend/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ mock
pytest
pytest-cov
pytest-order
python-dotenv

-r ./requirements.txt
1 change: 1 addition & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pyarrow
pyjwt
pydantic>=2.0.0
pydantic[email]
python-dotenv
python-multipart
uvicorn
requests
Expand Down
2 changes: 1 addition & 1 deletion backend/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os

TEST_SDK_VERSION = os.getenv("TEST_SDK_VERSION")
version = "0.1.8"
version = "0.1.9"
setup(
name="rapid-sdk",
version=version if TEST_SDK_VERSION is None else f"{version}.{TEST_SDK_VERSION}",
Expand Down
29 changes: 21 additions & 8 deletions backend/test/api/application/services/test_dataset_validation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
from typing import List

import numpy as np
import pandas as pd
import pytest

Expand Down Expand Up @@ -468,6 +469,8 @@ def test_return_error_message_when_not_correct_datatypes(self):
"col3": [1, 5, True],
"col4": [1.5, 2.5, "A"],
"col5": ["2021-01-01", "2021-05-01", 1000],
"col6": [None, None, None],
"col7": [np.nan, np.nan, np.nan]
}
)
schema = Schema(
Expand Down Expand Up @@ -503,17 +506,27 @@ def test_return_error_message_when_not_correct_datatypes(self):
data_type="date",
allow_null=False,
),
Column(
name="col6",
partition_index=None,
data_type="string",
allow_null=True,
),
Column(
name="col7",
partition_index=None,
data_type="string",
allow_null=True,
),
],
)

try:
dataset_has_correct_data_types(df, schema)
except DatasetValidationError as error:
assert error.message == [
"Column [col2] has an incorrect data type. Expected boolean, received string",
"Column [col3] has an incorrect data type. Expected int, received string",
"Column [col4] has an incorrect data type. Expected double, received string",
]
data_frame, error_list = dataset_has_correct_data_types(df, schema)
assert error_list == [
"Column [col2] has an incorrect data type. Expected boolean, received string",
"Column [col3] has an incorrect data type. Expected int, received string",
"Column [col4] has an incorrect data type. Expected bigint, received string",
]

def test_return_error_message_when_dataset_has_illegal_chars_in_partition_columns(
self,
Expand Down
11 changes: 11 additions & 0 deletions backend/test/rapid/items/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@
dataset="rapid_sdk",
sensitivity=SensitivityLevel.PUBLIC,
owners=[Owner(name="Test", email="[email protected]")],
description="test",
update_behaviour="OVERWRITE",
is_latest_version=True,
)


Expand All @@ -68,13 +71,19 @@ def test_create_schema_metadata_from_dict(self):
"dataset": "rapid_sdk",
"sensitivity": SensitivityLevel.PUBLIC,
"owners": [{"name": "Test", "email": "[email protected]"}],
"description": "test",
"update_behaviour": "OVERWRITE",
"is_latest_version": True,
}

schema_metadata = SchemaMetadata(**_schema_metadata)
assert schema_metadata.domain == "test"
assert schema_metadata.dataset == "rapid_sdk"
assert schema_metadata.sensitivity == "PUBLIC"
assert schema_metadata.owners == [Owner(name="Test", email="[email protected]")]
assert schema_metadata.description == "test"
assert schema_metadata.update_behaviour == "OVERWRITE"
assert schema_metadata.is_latest_version == True


class TestColumn:
Expand Down Expand Up @@ -273,6 +282,8 @@ def test_schema_returns_correct_dictionary(self):
"version": None,
"key_value_tags": {},
"key_only_tags": [],
"description": "test",
"update_behaviour": "OVERWRITE",
"is_latest_version": True,
},
"columns": [
Expand Down
65 changes: 55 additions & 10 deletions backend/vulnerability-ignore-list.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,56 @@
CVE-2019-19814 https://security-tracker.debian.org/tracker/CVE-2019-19814
CVE-2021-39686 https://security-tracker.debian.org/tracker/CVE-2021-39686
CVE-2013-7445 https://security-tracker.debian.org/tracker/CVE-2013-7445
CVE-2022-24765 https://security-tracker.debian.org/tracker/CVE-2022-24765
CVE-2023-6879 https://security-tracker.debian.org/tracker/CVE-2023-6879
CVE-2023-45853 https://security-tracker.debian.org/tracker/CVE-2023-45853
CVE-2023-52425 https://security-tracker.debian.org/tracker/CVE-2023-52425
CVE-2023-49462 https://security-tracker.debian.org/tracker/CVE-2023-49462
CVE-2024-46724 https://security-tracker.debian.org/tracker/CVE-2024-46724
CVE-2024-46738 https://security-tracker.debian.org/tracker/CVE-2024-46738
CVE-2024-46756 https://security-tracker.debian.org/tracker/CVE-2024-46756
CVE-2024-46731 https://security-tracker.debian.org/tracker/CVE-2024-46731
CVE-2024-44987 https://security-tracker.debian.org/tracker/CVE-2024-44987
CVE-2024-26913 https://security-tracker.debian.org/tracker/CVE-2024-26913
CVE-2024-46725 https://security-tracker.debian.org/tracker/CVE-2024-46725
CVE-2024-46759 https://security-tracker.debian.org/tracker/CVE-2024-46759
CVE-2024-44998 https://security-tracker.debian.org/tracker/CVE-2024-44998
CVE-2024-26952 https://security-tracker.debian.org/tracker/CVE-2024-26952
CVE-2024-38630 https://security-tracker.debian.org/tracker/CVE-2024-38630
CVE-2024-44974 https://security-tracker.debian.org/tracker/CVE-2024-44974
CVE-2024-41061 https://security-tracker.debian.org/tracker/CVE-2024-41061
CVE-2021-3847 https://security-tracker.debian.org/tracker/CVE-2021-3847
CVE-2022-27404 https://security-tracker.debian.org/tracker/CVE-2022-27404
CVE-2019-8457 https://security-tracker.debian.org/tracker/CVE-2019-8457
CVE-2022-1679 https://security-tracker.debian.org/tracker/CVE-2022-1679
CVE-2022-1652 https://security-tracker.debian.org/tracker/CVE-2022-1652
CVE-2019-15794 https://security-tracker.debian.org/tracker/CVE-2019-15794
CVE-2022-29187 https://security-tracker.debian.org/tracker/CVE-2022-29187
CVE-2024-46740 https://security-tracker.debian.org/tracker/CVE-2024-46740
CVE-2024-39479 https://security-tracker.debian.org/tracker/CVE-2024-39479
CVE-2024-41071 https://security-tracker.debian.org/tracker/CVE-2024-41071
CVE-2024-38570 https://security-tracker.debian.org/tracker/CVE-2024-38570
CVE-2019-19449 https://security-tracker.debian.org/tracker/CVE-2019-19449
CVE-2024-21803 https://security-tracker.debian.org/tracker/CVE-2024-21803
CVE-2024-46674 https://security-tracker.debian.org/tracker/CVE-2024-46674
CVE-2024-46673 https://security-tracker.debian.org/tracker/CVE-2024-46673
CVE-2024-46798 https://security-tracker.debian.org/tracker/CVE-2024-46798
CVE-2024-46782 https://security-tracker.debian.org/tracker/CVE-2024-46782
CVE-2024-46722 https://security-tracker.debian.org/tracker/CVE-2024-46722
CVE-2023-52452 https://security-tracker.debian.org/tracker/CVE-2023-52452
CVE-2024-42162 https://security-tracker.debian.org/tracker/CVE-2024-42162
CVE-2024-26930 https://security-tracker.debian.org/tracker/CVE-2024-26930
CVE-2024-46743 https://security-tracker.debian.org/tracker/CVE-2024-46743
CVE-2023-52827 https://security-tracker.debian.org/tracker/CVE-2023-52827
CVE-2024-45026 https://security-tracker.debian.org/tracker/CVE-2024-45026
CVE-2024-44941 https://security-tracker.debian.org/tracker/CVE-2024-44941
CVE-2024-44940 https://security-tracker.debian.org/tracker/CVE-2024-44940
CVE-2024-44942 https://security-tracker.debian.org/tracker/CVE-2024-44942
CVE-2024-46757 https://security-tracker.debian.org/tracker/CVE-2024-46757
CVE-2024-44999 https://security-tracker.debian.org/tracker/CVE-2024-44999
CVE-2024-46747 https://security-tracker.debian.org/tracker/CVE-2024-46747
CVE-2024-46723 https://security-tracker.debian.org/tracker/CVE-2024-46723
CVE-2021-3864 https://security-tracker.debian.org/tracker/CVE-2021-3864
CVE-2024-44986 https://security-tracker.debian.org/tracker/CVE-2024-44986
CVE-2024-46746 https://security-tracker.debian.org/tracker/CVE-2024-46746
CVE-2024-42228 https://security-tracker.debian.org/tracker/CVE-2024-42228
CVE-2013-7445 https://security-tracker.debian.org/tracker/CVE-2013-7445
CVE-2024-46800 https://security-tracker.debian.org/tracker/CVE-2024-46800
CVE-2019-19814 https://security-tracker.debian.org/tracker/CVE-2019-19814
CVE-2024-46758 https://security-tracker.debian.org/tracker/CVE-2024-46758
CVE-2023-2953 https://security-tracker.debian.org/tracker/CVE-2023-2953
CVE-2023-31484 https://security-tracker.debian.org/tracker/CVE-2023-31484
CVE-2023-7104 https://security-tracker.debian.org/tracker/CVE-2023-7104
CVE-2024-7006 https://security-tracker.debian.org/tracker/CVE-2024-7006
CVE-2023-52356 https://security-tracker.debian.org/tracker/CVE-2023-52356
CVE-2023-52355 https://security-tracker.debian.org/tracker/CVE-2023-52355
8 changes: 8 additions & 0 deletions docs/changelog/api.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# API Changelog

## v7.11.0 - _2024-09-26_

### Fixes

- Removed dependency on batect
- Removed the need to copy the access token when running the UI locally
- Refactored the e2e tests

## v7.10.0 - _2024-03-21_

### Fixes
Expand Down
6 changes: 6 additions & 0 deletions docs/changelog/sdk.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# SDK Changelog

## v0.1.9 - _2024-09-12_

### Features

- Expanded rAPId sdk metadata to include: update_behaviour, is_latest_version and description.

## v0.1.8 - _2024-03-21_

### Features
Expand Down
6 changes: 0 additions & 6 deletions docs/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,6 @@ FRONTEND_CLIENT_SECRET=

Running `make frontend/run-dev` will then launch the Frontend in development mode with hot reloading.

### RAT Token

The Frontend looks for a cookie with the name `rat` (rAPId Access Token), to authenticate with the API. This is a Cognito token that is generated by the Frontend and passed to the API in the `Authorization` header. The API then validates the token and extracts the user information from it.

For the authentication to work when running the local Frontend, you need to login to your deployed version of rAPId, copy the value of the `rat` cookie. Navigate back to your locally running version of the Frontend and create a new `rat` cookie with the value you copied.

## Testing

Every rAPId module other than the infrastructure has complementary tests.
Expand Down
1 change: 1 addition & 0 deletions infrastructure/modules/rapid/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ module "ui" {
load_balancer_dns = module.app_cluster.load_balancer_dns
route_53_validation_record_fqdns = module.app_cluster.route_53_validation_record_fqdns
geo_restriction_locations = var.geo_restriction_locations
sql_injection_protection = var.sql_injection_protection
}

resource "aws_s3_bucket" "this" {
Expand Down
6 changes: 6 additions & 0 deletions infrastructure/modules/rapid/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ variable "geo_restriction_locations" {
default = ["GB"]
}

variable "sql_injection_protection" {
description = "Whether to add SQL injection protection rule to WAF. Setting the variable to false may result in reduced application protection."
type = bool
default = true
}

variable "password_policy" {
type = object({
minimum_length = number
Expand Down
Loading

0 comments on commit efeb90b

Please sign in to comment.