Skip to content

Commit

Permalink
Merge pull request #41 from no10ds/feature/fix-sdk-dataset-patterns
Browse files Browse the repository at this point in the history
Fix sdk dataset patterns
  • Loading branch information
lcardno10 authored Oct 11, 2023
2 parents 925c6f9 + f1ff07c commit a5df376
Show file tree
Hide file tree
Showing 11 changed files with 286 additions and 238 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ sdk-setup: ## Setup Python required for the sdk
# SDK Testing --------------------
##
sdk-test: ## Run sdk unit tests
@cd sdk/; pytest -vv -s
@cd sdk/; . .venv/bin/activate && pytest -vv -s

# SDK Release --------------------
##
Expand Down
7 changes: 6 additions & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## v7.0.5 / v0.1.3 (sdk) - _2023-09-20_

### Fixes

- Fix the behaviour of the dataset pattern functions in the SDK.

## v7.0.4 / v0.1.2 (sdk) - _2023-09-20_

### Features
Expand All @@ -18,7 +24,6 @@

- Fixes issue where permissions were not being correctly read and causing api functionality to fail


## v7.0.2 / v0.1.2 (sdk) - _2023-09-14_

### Fixes
Expand Down
2 changes: 1 addition & 1 deletion docs/sdk/api/patterns/data.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
::: rapid.patterns.data
::: rapid.patterns.dataset
4 changes: 2 additions & 2 deletions docs/sdk/useful_patterns.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ metadata = SchemaMetadata(
)

try:
data.upload_and_create_dataframe(
data.upload_and_create_dataset(
rapid=rapid, df=df, metadata=metadata, upgrade_schema_on_fail=False
)
except DataFrameUploadValidationException:
Expand Down Expand Up @@ -57,7 +57,7 @@ metadata = SchemaMetadata(
)

try:
data.update_schema_dataframe(
data.update_schema_to_dataframe(
rapid=rapid,
df=df,
metadata=metadata,
Expand Down
4 changes: 2 additions & 2 deletions infrastructure/modules/rapid/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ variable "app-replica-count-max" {
variable "application_version" {
type = string
description = "The version number for the application image (e.g.: v1.0.4, v1.0.x-latest, etc.)"
default = "v7.0.4"
default = "v7.0.5"
}

variable "ui_version" {
type = string
description = "The version number for the static ui (e.g.: v1.0.0, etc.)"
default = "v7.0.4"
default = "v7.0.5"
}

variable "catalog_disabled" {
Expand Down
68 changes: 0 additions & 68 deletions sdk/rapid/patterns/data.py

This file was deleted.

68 changes: 68 additions & 0 deletions sdk/rapid/patterns/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from pandas import DataFrame
from rapid.exceptions import (
DataFrameUploadValidationException,
DatasetNotFoundException,
)
from rapid.items.schema import Schema, SchemaMetadata
from rapid import Rapid


def upload_and_create_dataset(
rapid: Rapid, metadata: SchemaMetadata, df: DataFrame, upgrade_schema_on_fail=False
):
"""
Uploads a dataframe to a dataset in the API, creating schema first if necessary.
Args:
rapid (Rapid): An instance of the rAPId SDK's main class.
metadata (SchemaMetadata): The metadata for the schema to be created and the dataset to upload the DataFrame to.ß
df (DataFrame): The pandas DataFrame to generate a schema for and upload to the dataset.
upgrade_schema_on_fail (bool, optional): Whether to upgrade the schema if the DataFrame's schema is incorrect. Defaults to False.
Raises:
rapid.exceptions.DataFrameUploadValidationException: If the DataFrame's schema is incorrect and upgrade_schema_on_fail is False.
"""
try:
rapid.upload_dataframe(
metadata.layer, metadata.domain, metadata.dataset, df, wait_to_complete=True
)
except DatasetNotFoundException:
schema = rapid.generate_schema(
df, metadata.layer, metadata.domain, metadata.dataset, metadata.sensitivity
)
rapid.create_schema(schema)
rapid.upload_dataframe(
metadata.layer, metadata.domain, metadata.dataset, df, wait_to_complete=True
)
except DataFrameUploadValidationException as validation_exception:
if upgrade_schema_on_fail:
update_schema_to_dataframe(rapid, metadata, df)
rapid.upload_dataframe(
metadata.layer,
metadata.domain,
metadata.dataset,
df,
wait_to_complete=True,
)
else:
raise validation_exception


def update_schema_to_dataframe(
rapid: Rapid,
metadata: SchemaMetadata,
df: DataFrame,
):
"""
Updates a schema for a specified dataset in the API to match the given Dataframe.
Args:
rapid (Rapid): An instance of the rAPId SDK's main class.
metadata (SchemaMetadata): The metadata for the schema to be updated and the dataset the DataFrame belongs to.
df (Dataframe): The dataframe that the schema should be updated to match.
"""
schema_response = rapid.generate_schema(
df, metadata.layer, metadata.domain, metadata.dataset, metadata.sensitivity
)
schema = Schema(metadata=metadata, columns=schema_response.columns)
rapid.update_schema(schema)
27 changes: 19 additions & 8 deletions sdk/rapid/rapid.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def download_dataframe(
return pd.read_json(json.dumps(data), orient="index")

raise DatasetNotFoundException(
f"Could not find dataset, {domain}/{dataset} to download", data
f"Could not find dataset, {layer}/{domain}/{dataset} to download", data
)

def upload_dataframe(
Expand All @@ -159,8 +159,9 @@ def upload_dataframe(
wait_to_complete (bool, optional): Whether to wait for the upload job to complete before returning. Defaults to True.
Raises:
rapid.exceptions.DataFrameUploadValidationException: If the DataFrame's schema is incorrect.
rapid.exceptions.DataFrameUploadFailedException: If an unexpected error occurs while uploading the DataFrame.
rapid.exceptions.DataFrameUploadValidationException: If the DataFrame's schema is incorrect.
rapid.exceptions.DataFrameUploadFailedException: If an unexpected error occurs while uploading the DataFrame.
rapid.exceptions.DatasetNotFoundException: If the specified dataset does not exist.
Returns:
If wait_to_complete is True, returns "Success" if the upload is successful.
Expand All @@ -184,11 +185,15 @@ def upload_dataframe(
raise DataFrameUploadValidationException(
"Could not upload dataframe due to an incorrect schema definition"
)

raise DataFrameUploadFailedException(
"Encountered an unexpected error, could not upload dataframe",
data["details"],
)
elif response.status_code == 404:
raise DatasetNotFoundException(
"Could not find dataset: {layer}/{domain}/{dataset}", data
)
else:
raise DataFrameUploadFailedException(
"Encountered an unexpected error, could not upload dataframe",
data["details"],
)

def fetch_dataset_info(self, layer: str, domain: str, dataset: str):
"""
Expand All @@ -201,6 +206,7 @@ def fetch_dataset_info(self, layer: str, domain: str, dataset: str):
Raises:
rapid.exceptions.DatasetInfoFailedException: If an error occurs while fetching the dataset information.
rapid.exceptions.DatasetNotFoundException: If the specified dataset does not exist.
Returns:
A dictionary containing the metadata information for the dataset.
Expand All @@ -215,6 +221,11 @@ def fetch_dataset_info(self, layer: str, domain: str, dataset: str):
if response.status_code == 200:
return data

if response.status_code == 404:
raise DatasetNotFoundException(
f"Could not find dataset, {layer}/{domain}/{dataset} to get info", data
)

raise DatasetInfoFailedException(
"Failed to gather the dataset info", data["details"]
)
Expand Down
2 changes: 1 addition & 1 deletion sdk/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="rapid-sdk",
version="0.1.2",
version="0.1.3",
description="A python sdk for the rAPId API",
url="https://github.com/no10ds/rapid-sdk",
author="Lewis Card",
Expand Down
Loading

0 comments on commit a5df376

Please sign in to comment.