From ed549d2eedb9fb3512e92b7458a389e112b91520 Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Tue, 30 Apr 2024 15:14:40 +0100 Subject: [PATCH 01/14] Set up basic test structure for Zendesk backup --- requirements.txt | 2 ++ tests/__init__.py | 0 tests/test_zendesk_backup.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 requirements.txt create mode 100644 tests/__init__.py create mode 100644 tests/test_zendesk_backup.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..89e2f35 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +-r lambda/zendesk-backup/requirements.txt +pytest \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_zendesk_backup.py b/tests/test_zendesk_backup.py new file mode 100644 index 0000000..2d36cf2 --- /dev/null +++ b/tests/test_zendesk_backup.py @@ -0,0 +1,28 @@ +import pytest + + +@pytest.fixture +def zendesk_backup_event(): + return { + "_time": 1714446045.0087461, + "context": "LambdaContext([aws_request_id=1f6df4b9-0a8e-434d-87e5-00f59f07c2f2," + "log_group_name=/aws/lambda/zendesk-backup,log_stream_name=2024/04/30/[" + "$LATEST]20ac904278394d5ba8163542dfa8e884,function_name=zendesk-backup,memory_limit_in_mb=512," + "function_version=$LATEST," + "invoked_function_arn=arn:aws:lambda:eu-west-2:468623140221:function:zendesk-backup," + "client_context=None,identity=CognitoIdentity([cognito_identity_id=None," + "cognito_identity_pool_id=None])])", + "event": { + "version": "0", + "id": "1697296e-2030-dda6-7f4a-ac16427e291a", + "detail-type": "Scheduled Event", + "source": "aws.events", + "account": "468623140221", + "time": "2024-04-30T03:00:00Z", + "region": "eu-west-2", + "resources": [ + "arn:aws:events:eu-west-2:468623140221:rule/lambda-zendesk-backup-event-rule" + ], + "detail": {} + } + } From 4a532e9b21eba9194d7a7e8e712053cadeaed567 Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Tue, 30 Apr 2024 15:24:09 +0100 Subject: [PATCH 02/14] Clean up a function a little, and add docstring --- lambda/zendesk-backup/main.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lambda/zendesk-backup/main.py b/lambda/zendesk-backup/main.py index 72161a9..0a9f175 100644 --- a/lambda/zendesk-backup/main.py +++ b/lambda/zendesk-backup/main.py @@ -1,5 +1,7 @@ import os import json +from typing import Optional + import boto3 import time import re @@ -54,12 +56,15 @@ def add_athena_datetimes(d: dict = {}) -> dict: return res -def save_support(ticket_ids: list = []): - tickets = [] +def save_support(ticket_ids: Optional[list] = None): + """ + Save support tickets from Zendesk. Additionally, add a datetime to them + :param ticket_ids: + :return: + """ if ticket_ids: - for ticket_id in ticket_ids: - tickets.append(zenpy_client.tickets(id=str(ticket_id))) + tickets = [zenpy_client.tickets(id=str(ticket_id)) for ticket_id in ticket_ids] else: tickets = zenpy_client.search_export(type="ticket") From ffb144cd1c8cbfe8f5695f213db538c0ceeafb6f Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Tue, 30 Apr 2024 17:00:59 +0100 Subject: [PATCH 03/14] Rename zendesk-backup directory As noted elsewhere, modules in python need to have underscores, rather than hyphens. This, as well as the new init file, allows it to be imported for testing --- lambda/zendesk_backup/__init__.py | 0 .../build.sh | 0 .../eventbridge.tf | 0 .../lambda.tf | 0 .../main.py | 27 ++++++++++++------- .../requirements.txt | 0 6 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 lambda/zendesk_backup/__init__.py rename lambda/{zendesk-backup => zendesk_backup}/build.sh (100%) rename lambda/{zendesk-backup => zendesk_backup}/eventbridge.tf (100%) rename lambda/{zendesk-backup => zendesk_backup}/lambda.tf (100%) rename lambda/{zendesk-backup => zendesk_backup}/main.py (84%) rename lambda/{zendesk-backup => zendesk_backup}/requirements.txt (100%) diff --git a/lambda/zendesk_backup/__init__.py b/lambda/zendesk_backup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lambda/zendesk-backup/build.sh b/lambda/zendesk_backup/build.sh similarity index 100% rename from lambda/zendesk-backup/build.sh rename to lambda/zendesk_backup/build.sh diff --git a/lambda/zendesk-backup/eventbridge.tf b/lambda/zendesk_backup/eventbridge.tf similarity index 100% rename from lambda/zendesk-backup/eventbridge.tf rename to lambda/zendesk_backup/eventbridge.tf diff --git a/lambda/zendesk-backup/lambda.tf b/lambda/zendesk_backup/lambda.tf similarity index 100% rename from lambda/zendesk-backup/lambda.tf rename to lambda/zendesk_backup/lambda.tf diff --git a/lambda/zendesk-backup/main.py b/lambda/zendesk_backup/main.py similarity index 84% rename from lambda/zendesk-backup/main.py rename to lambda/zendesk_backup/main.py index 0a9f175..4bff9b1 100644 --- a/lambda/zendesk-backup/main.py +++ b/lambda/zendesk_backup/main.py @@ -1,6 +1,6 @@ import os import json -from typing import Optional +from typing import Optional, Union import boto3 import time @@ -43,16 +43,23 @@ def get_key(obj: dict) -> str: return res -def add_athena_datetimes(d: dict = {}) -> dict: +def add_athena_datetimes(json_dict: dict[str, Union[int, str, dict, list]]) -> dict: + """ + Take a JSON formatted dictionary. Find a string that contains this pattern: a 2, followed by any number of digits, + followed by a T/t, followed by two digits, followed by a colon. Having found that, replace the 't' with a space, + remove the z (we are assuming there's a 'z' in this), and then split the string on the dots. Only take the first + section of this newly split string, and add it back to the dictionary under the key f"{key}_athena" + + :param json_dict: + :return: + """ res = {} - for key in d: - if d[key] and type(d[key]) == str: - if re.match("(?i)2[\d\-]+t\d\d:", d[key]): - res[f"{key}_athena"] = ( - d[key].lower().replace("t", " ").replace("z", "").split(".")[0] - ) - - res.update(d) + for key, value in json_dict.items(): + if type(value) is str: + if re.match(r"(?i)2[\d\-]+t\d\d:", value): + res[f"{key}_athena"] = (value.lower().replace("t", " ").replace("z", "").split(".")[0]) + + res.update(json_dict) return res diff --git a/lambda/zendesk-backup/requirements.txt b/lambda/zendesk_backup/requirements.txt similarity index 100% rename from lambda/zendesk-backup/requirements.txt rename to lambda/zendesk_backup/requirements.txt From a7be28104887b6efd508e92e5b9fea38bdc2d1be Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Tue, 30 Apr 2024 17:06:35 +0100 Subject: [PATCH 04/14] Rename lambda package This package has been renamed because 'lambda' is a protected term in python, and was preventing me from testing the software --- ...lambda_crawler-govuk-reference-content_deploy.yml | 12 ++++++------ .github/workflows/lambda_email-forwarder_deploy.yml | 8 ++++---- .../lambda_hackerone-zendesk-integration_deploy.yml | 10 +++++----- .github/workflows/lambda_zendesk-backup_deploy.yml | 10 +++++----- .../crawler-govuk-reference-content/build.sh | 0 .../dev-requirements.txt | 0 .../crawler-govuk-reference-content/lambda.tf | 0 .../crawler-govuk-reference-content/main.py | 0 {lambda => lambda_}/email-forwarder/aws.tf | 0 {lambda => lambda_}/email-forwarder/lambda.tf | 0 {lambda => lambda_}/email-forwarder/main.py | 0 {lambda => lambda_}/email-forwarder/s3.tf | 0 {lambda => lambda_}/email-forwarder/variables.tf | 0 .../hackerone-zendesk-integration/build.sh | 0 .../hackerone-zendesk-integration/hackerone.py | 0 .../hackerone-zendesk-integration/lambda.tf | 0 .../hackerone-zendesk-integration/main.py | 0 .../hackerone-zendesk-integration/requirements.txt | 0 .../hackerone-zendesk-integration/zendesk.py | 0 .../maintenance-load-athena-partitions/main.py | 0 {lambda => lambda_}/zendesk_backup/__init__.py | 0 {lambda => lambda_}/zendesk_backup/build.sh | 0 {lambda => lambda_}/zendesk_backup/eventbridge.tf | 0 {lambda => lambda_}/zendesk_backup/lambda.tf | 0 {lambda => lambda_}/zendesk_backup/main.py | 0 {lambda => lambda_}/zendesk_backup/requirements.txt | 0 26 files changed, 20 insertions(+), 20 deletions(-) rename {lambda => lambda_}/crawler-govuk-reference-content/build.sh (100%) rename {lambda => lambda_}/crawler-govuk-reference-content/dev-requirements.txt (100%) rename {lambda => lambda_}/crawler-govuk-reference-content/lambda.tf (100%) rename {lambda => lambda_}/crawler-govuk-reference-content/main.py (100%) rename {lambda => lambda_}/email-forwarder/aws.tf (100%) rename {lambda => lambda_}/email-forwarder/lambda.tf (100%) rename {lambda => lambda_}/email-forwarder/main.py (100%) rename {lambda => lambda_}/email-forwarder/s3.tf (100%) rename {lambda => lambda_}/email-forwarder/variables.tf (100%) rename {lambda => lambda_}/hackerone-zendesk-integration/build.sh (100%) rename {lambda => lambda_}/hackerone-zendesk-integration/hackerone.py (100%) rename {lambda => lambda_}/hackerone-zendesk-integration/lambda.tf (100%) rename {lambda => lambda_}/hackerone-zendesk-integration/main.py (100%) rename {lambda => lambda_}/hackerone-zendesk-integration/requirements.txt (100%) rename {lambda => lambda_}/hackerone-zendesk-integration/zendesk.py (100%) rename {lambda => lambda_}/maintenance-load-athena-partitions/main.py (100%) rename {lambda => lambda_}/zendesk_backup/__init__.py (100%) rename {lambda => lambda_}/zendesk_backup/build.sh (100%) rename {lambda => lambda_}/zendesk_backup/eventbridge.tf (100%) rename {lambda => lambda_}/zendesk_backup/lambda.tf (100%) rename {lambda => lambda_}/zendesk_backup/main.py (100%) rename {lambda => lambda_}/zendesk_backup/requirements.txt (100%) diff --git a/.github/workflows/lambda_crawler-govuk-reference-content_deploy.yml b/.github/workflows/lambda_crawler-govuk-reference-content_deploy.yml index 5fa66e0..6c6090f 100644 --- a/.github/workflows/lambda_crawler-govuk-reference-content_deploy.yml +++ b/.github/workflows/lambda_crawler-govuk-reference-content_deploy.yml @@ -4,7 +4,7 @@ on: push: branches: [ "main" ] paths: - - lambda/crawler-govuk-reference-content/** + - lambda_/crawler-govuk-reference-content/** - .github/workflows/lambda_crawler-govuk-reference-content_deploy.yml workflow_dispatch: env: @@ -48,7 +48,7 @@ jobs: echo "github.ref: ${{ github.ref }}" ls -lah bash build.sh - working-directory: lambda/crawler-govuk-reference-content/ + working-directory: lambda_/crawler-govuk-reference-content/ - name: configure aws credentials uses: aws-actions/configure-aws-credentials@v2 @@ -61,17 +61,17 @@ jobs: run: | aws sts get-caller-identity ls -lah - working-directory: lambda/crawler-govuk-reference-content/ + working-directory: lambda_/crawler-govuk-reference-content/ # Initialize a new or existing Terraform working directory by creating initial files, loading any remote state, downloading modules, etc. - name: Terraform Init run: terraform init - working-directory: lambda/crawler-govuk-reference-content/ + working-directory: lambda_/crawler-govuk-reference-content/ # Checks that all Terraform configuration files adhere to a canonical format - name: Terraform Format run: terraform fmt -check - working-directory: lambda/crawler-govuk-reference-content/ + working-directory: lambda_/crawler-govuk-reference-content/ # Generates an execution plan for Terraform - name: Terraform Apply @@ -83,4 +83,4 @@ jobs: -var="production_iam_role=${{ secrets.PRODUCTION_IAM_ROLE }}" env: TERRAFORM_WORKSPACE: ${{ matrix.environment }} - working-directory: lambda/crawler-govuk-reference-content/ + working-directory: lambda_/crawler-govuk-reference-content/ diff --git a/.github/workflows/lambda_email-forwarder_deploy.yml b/.github/workflows/lambda_email-forwarder_deploy.yml index d121150..d0e47ef 100644 --- a/.github/workflows/lambda_email-forwarder_deploy.yml +++ b/.github/workflows/lambda_email-forwarder_deploy.yml @@ -4,7 +4,7 @@ on: push: branches: [ main ] paths: - - lambda/email-forwarder/** + - lambda_/email-forwarder/** workflow_dispatch: permissions: @@ -52,12 +52,12 @@ jobs: # Initialize a new or existing Terraform working directory by creating initial files, loading any remote state, downloading modules, etc. - name: Terraform Init run: terraform init - working-directory: lambda/email-forwarder/ + working-directory: lambda_/email-forwarder/ # Checks that all Terraform configuration files adhere to a canonical format - name: Terraform Format run: terraform fmt -check - working-directory: lambda/email-forwarder/ + working-directory: lambda_/email-forwarder/ # Generates an execution plan for Terraform - name: Terraform Apply @@ -68,4 +68,4 @@ jobs: -var="production_iam_role=${{ secrets.PRODUCTION_IAM_ROLE }}" env: TF_WORKSPACE: ${{ matrix.environment }} - working-directory: lambda/email-forwarder/ + working-directory: lambda_/email-forwarder/ diff --git a/.github/workflows/lambda_hackerone-zendesk-integration_deploy.yml b/.github/workflows/lambda_hackerone-zendesk-integration_deploy.yml index fe03168..48b8e9c 100644 --- a/.github/workflows/lambda_hackerone-zendesk-integration_deploy.yml +++ b/.github/workflows/lambda_hackerone-zendesk-integration_deploy.yml @@ -4,7 +4,7 @@ on: push: branches: [ main ] paths: - - lambda/hackerone-zendesk-integration/** + - lambda_/hackerone-zendesk-integration/** workflow_dispatch: permissions: @@ -42,7 +42,7 @@ jobs: run: | ls -lah bash build.sh - working-directory: lambda/hackerone-zendesk-integration/ + working-directory: lambda_/hackerone-zendesk-integration/ - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 @@ -58,12 +58,12 @@ jobs: # Initialize a new or existing Terraform working directory by creating initial files, loading any remote state, downloading modules, etc. - name: Terraform Init run: terraform init - working-directory: lambda/hackerone-zendesk-integration/ + working-directory: lambda_/hackerone-zendesk-integration/ # Checks that all Terraform configuration files adhere to a canonical format - name: Terraform Format run: terraform fmt -check - working-directory: lambda/hackerone-zendesk-integration/ + working-directory: lambda_/hackerone-zendesk-integration/ # Generates an execution plan for Terraform - name: Terraform Apply @@ -74,4 +74,4 @@ jobs: -var="production_iam_role=${{ secrets.PRODUCTION_IAM_ROLE }}" env: TF_WORKSPACE: ${{ matrix.environment }} - working-directory: lambda/hackerone-zendesk-integration/ + working-directory: lambda_/hackerone-zendesk-integration/ diff --git a/.github/workflows/lambda_zendesk-backup_deploy.yml b/.github/workflows/lambda_zendesk-backup_deploy.yml index 98bd5c8..2e2dff6 100644 --- a/.github/workflows/lambda_zendesk-backup_deploy.yml +++ b/.github/workflows/lambda_zendesk-backup_deploy.yml @@ -4,7 +4,7 @@ on: push: branches: [ main ] paths: - - lambda/zendesk-backup/** + - lambda_/zendesk_backup/** workflow_dispatch: permissions: @@ -42,7 +42,7 @@ jobs: run: | ls -lah bash build.sh - working-directory: lambda/zendesk-backup/ + working-directory: lambda_/zendesk_backup/ - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 @@ -58,12 +58,12 @@ jobs: # Initialize a new or existing Terraform working directory by creating initial files, loading any remote state, downloading modules, etc. - name: Terraform Init run: terraform init - working-directory: lambda/zendesk-backup/ + working-directory: lambda_/zendesk_backup/ # Checks that all Terraform configuration files adhere to a canonical format - name: Terraform Format run: terraform fmt -check - working-directory: lambda/zendesk-backup/ + working-directory: lambda_/zendesk_backup/ # Generates an execution plan for Terraform - name: Terraform Apply @@ -74,4 +74,4 @@ jobs: -var="production_iam_role=${{ secrets.PRODUCTION_IAM_ROLE }}" env: TF_WORKSPACE: ${{ matrix.environment }} - working-directory: lambda/zendesk-backup/ + working-directory: lambda_/zendesk_backup/ diff --git a/lambda/crawler-govuk-reference-content/build.sh b/lambda_/crawler-govuk-reference-content/build.sh similarity index 100% rename from lambda/crawler-govuk-reference-content/build.sh rename to lambda_/crawler-govuk-reference-content/build.sh diff --git a/lambda/crawler-govuk-reference-content/dev-requirements.txt b/lambda_/crawler-govuk-reference-content/dev-requirements.txt similarity index 100% rename from lambda/crawler-govuk-reference-content/dev-requirements.txt rename to lambda_/crawler-govuk-reference-content/dev-requirements.txt diff --git a/lambda/crawler-govuk-reference-content/lambda.tf b/lambda_/crawler-govuk-reference-content/lambda.tf similarity index 100% rename from lambda/crawler-govuk-reference-content/lambda.tf rename to lambda_/crawler-govuk-reference-content/lambda.tf diff --git a/lambda/crawler-govuk-reference-content/main.py b/lambda_/crawler-govuk-reference-content/main.py similarity index 100% rename from lambda/crawler-govuk-reference-content/main.py rename to lambda_/crawler-govuk-reference-content/main.py diff --git a/lambda/email-forwarder/aws.tf b/lambda_/email-forwarder/aws.tf similarity index 100% rename from lambda/email-forwarder/aws.tf rename to lambda_/email-forwarder/aws.tf diff --git a/lambda/email-forwarder/lambda.tf b/lambda_/email-forwarder/lambda.tf similarity index 100% rename from lambda/email-forwarder/lambda.tf rename to lambda_/email-forwarder/lambda.tf diff --git a/lambda/email-forwarder/main.py b/lambda_/email-forwarder/main.py similarity index 100% rename from lambda/email-forwarder/main.py rename to lambda_/email-forwarder/main.py diff --git a/lambda/email-forwarder/s3.tf b/lambda_/email-forwarder/s3.tf similarity index 100% rename from lambda/email-forwarder/s3.tf rename to lambda_/email-forwarder/s3.tf diff --git a/lambda/email-forwarder/variables.tf b/lambda_/email-forwarder/variables.tf similarity index 100% rename from lambda/email-forwarder/variables.tf rename to lambda_/email-forwarder/variables.tf diff --git a/lambda/hackerone-zendesk-integration/build.sh b/lambda_/hackerone-zendesk-integration/build.sh similarity index 100% rename from lambda/hackerone-zendesk-integration/build.sh rename to lambda_/hackerone-zendesk-integration/build.sh diff --git a/lambda/hackerone-zendesk-integration/hackerone.py b/lambda_/hackerone-zendesk-integration/hackerone.py similarity index 100% rename from lambda/hackerone-zendesk-integration/hackerone.py rename to lambda_/hackerone-zendesk-integration/hackerone.py diff --git a/lambda/hackerone-zendesk-integration/lambda.tf b/lambda_/hackerone-zendesk-integration/lambda.tf similarity index 100% rename from lambda/hackerone-zendesk-integration/lambda.tf rename to lambda_/hackerone-zendesk-integration/lambda.tf diff --git a/lambda/hackerone-zendesk-integration/main.py b/lambda_/hackerone-zendesk-integration/main.py similarity index 100% rename from lambda/hackerone-zendesk-integration/main.py rename to lambda_/hackerone-zendesk-integration/main.py diff --git a/lambda/hackerone-zendesk-integration/requirements.txt b/lambda_/hackerone-zendesk-integration/requirements.txt similarity index 100% rename from lambda/hackerone-zendesk-integration/requirements.txt rename to lambda_/hackerone-zendesk-integration/requirements.txt diff --git a/lambda/hackerone-zendesk-integration/zendesk.py b/lambda_/hackerone-zendesk-integration/zendesk.py similarity index 100% rename from lambda/hackerone-zendesk-integration/zendesk.py rename to lambda_/hackerone-zendesk-integration/zendesk.py diff --git a/lambda/maintenance-load-athena-partitions/main.py b/lambda_/maintenance-load-athena-partitions/main.py similarity index 100% rename from lambda/maintenance-load-athena-partitions/main.py rename to lambda_/maintenance-load-athena-partitions/main.py diff --git a/lambda/zendesk_backup/__init__.py b/lambda_/zendesk_backup/__init__.py similarity index 100% rename from lambda/zendesk_backup/__init__.py rename to lambda_/zendesk_backup/__init__.py diff --git a/lambda/zendesk_backup/build.sh b/lambda_/zendesk_backup/build.sh similarity index 100% rename from lambda/zendesk_backup/build.sh rename to lambda_/zendesk_backup/build.sh diff --git a/lambda/zendesk_backup/eventbridge.tf b/lambda_/zendesk_backup/eventbridge.tf similarity index 100% rename from lambda/zendesk_backup/eventbridge.tf rename to lambda_/zendesk_backup/eventbridge.tf diff --git a/lambda/zendesk_backup/lambda.tf b/lambda_/zendesk_backup/lambda.tf similarity index 100% rename from lambda/zendesk_backup/lambda.tf rename to lambda_/zendesk_backup/lambda.tf diff --git a/lambda/zendesk_backup/main.py b/lambda_/zendesk_backup/main.py similarity index 100% rename from lambda/zendesk_backup/main.py rename to lambda_/zendesk_backup/main.py diff --git a/lambda/zendesk_backup/requirements.txt b/lambda_/zendesk_backup/requirements.txt similarity index 100% rename from lambda/zendesk_backup/requirements.txt rename to lambda_/zendesk_backup/requirements.txt From 71bc1f480dbca9d7f2713e3fe0e739e39ac8e62d Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Tue, 30 Apr 2024 17:40:58 +0100 Subject: [PATCH 05/14] Set up tests Set up the tests and fixtures --- tests/test_zendesk_backup.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_zendesk_backup.py b/tests/test_zendesk_backup.py index 2d36cf2..99a49e7 100644 --- a/tests/test_zendesk_backup.py +++ b/tests/test_zendesk_backup.py @@ -1,4 +1,14 @@ +import os +from unittest import mock + import pytest +from lambda_.zendesk_backup import main as zendesk_backup + + +@pytest.fixture(autouse=True) +def mock_env_vars(): + with mock.patch.dict(os.environ, values={}): + yield @pytest.fixture @@ -26,3 +36,12 @@ def zendesk_backup_event(): "detail": {} } } + +@pytest.fixture +def json_ticket() -> dict[str, str]: + return { + "created_at": "2024-03-15T15:50:18Z" + } + +def test_athena_datetime(json_ticket): + assert zendesk_backup.add_athena_datetimes(json_ticket) == {json_ticket.update({"athena_created_at": "2024-03-15 15:50:18"})} From 79f645068a831ee457874e55c5b487baa80cdae9 Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Tue, 30 Apr 2024 17:51:09 +0100 Subject: [PATCH 06/14] Use functions to generate clients This enables us to test by mocking these clients out --- lambda_/zendesk_backup/main.py | 47 ++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/lambda_/zendesk_backup/main.py b/lambda_/zendesk_backup/main.py index 4bff9b1..325e253 100644 --- a/lambda_/zendesk_backup/main.py +++ b/lambda_/zendesk_backup/main.py @@ -5,22 +5,33 @@ import boto3 import time import re - +import functools from zenpy import Zenpy -s3_bucket = os.environ["S3_BUCKET"] -s3_client = boto3.client("s3") + +@functools.cache +def get_s3_bucket() -> str: + return os.environ["S3_BUCKET"] + + +@functools.cache +def s3_client(): + return boto3.client("s3") + s3_helpcentre_prefix = "helpcentre/" s3_support_prefix = "support/" -zendesk_creds = { - "email": os.environ["ZENDESK_API_EMAIL"], - "token": os.environ["ZENDESK_API_KEY"], - "subdomain": os.environ["ZENDESK_SUBDOMAIN"], -} -zenpy_client = Zenpy(**zendesk_creds) +@functools.cache +def zenpy_client() -> Zenpy: + zendesk_creds = { + "email": os.environ["ZENDESK_API_EMAIL"], + "token": os.environ["ZENDESK_API_KEY"], + "subdomain": os.environ["ZENDESK_SUBDOMAIN"], + } + + return Zenpy(**zendesk_creds) def jprint(obj): @@ -70,10 +81,11 @@ def save_support(ticket_ids: Optional[list] = None): :param ticket_ids: :return: """ + s3_bucket = get_s3_bucket if ticket_ids: - tickets = [zenpy_client.tickets(id=str(ticket_id)) for ticket_id in ticket_ids] + tickets = [zenpy_client().tickets(id=str(ticket_id)) for ticket_id in ticket_ids] else: - tickets = zenpy_client.search_export(type="ticket") + tickets = zenpy_client().search_export(type="ticket") for ticket in tickets: # subject = re.sub(r"\s+", " ", re.sub(r"[^a-zA-Z0-9 ]", "", ticket.raw_subject)) @@ -83,7 +95,7 @@ def save_support(ticket_ids: Optional[list] = None): dobj = add_athena_datetimes(ticket.to_dict()) - s3_client.put_object( + s3_client().put_object( Body=json.dumps(dobj, default=str).encode("utf-8"), Bucket=s3_bucket, Key=key, @@ -91,16 +103,17 @@ def save_support(ticket_ids: Optional[list] = None): def save_helpcentre(article_ids: list = []): + s3_bucket = get_s3_bucket() files = {} - categories = zenpy_client.help_center.categories() + categories = zenpy_client().help_center.categories() for category in categories: category_key = get_key(category.to_dict()) if category_key: if article_ids == []: files[category_key] = category.to_dict() - sections = zenpy_client.help_center.sections(category_id=category.id) + sections = zenpy_client().help_center.sections(category_id=category.id) for section in sections: if section.category_id == category.id: section_ref = get_key(section.to_dict()) @@ -109,7 +122,7 @@ def save_helpcentre(article_ids: list = []): if article_ids == []: files[section_key] = section.to_dict() - articles = zenpy_client.help_center.articles( + articles = zenpy_client().help_center.articles( section_id=section.id ) for article in articles: @@ -133,14 +146,14 @@ def save_helpcentre(article_ids: list = []): wdt = add_athena_datetimes(file_obj) jprint(f"Saving 's3://{s3_bucket}/{s3_helpcentre_prefix}{filename}'") - s3_client.put_object( + s3_client().put_object( Body=json.dumps(wdt, default=str).encode("utf-8"), Bucket=s3_bucket, Key=f"{s3_helpcentre_prefix}{filename}", ) if html and html_filename: jprint(f"Saving 's3://{s3_bucket}/{s3_helpcentre_prefix}{html_filename}'") - s3_client.put_object( + s3_client().put_object( Body=html.encode("utf-8"), Bucket=s3_bucket, Key=f"{s3_helpcentre_prefix}{html_filename}", From c46c96308220054d782eead7c5c3c189c43b509f Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Tue, 30 Apr 2024 17:57:37 +0100 Subject: [PATCH 07/14] Add small test for athena_datetime function --- tests/test_zendesk_backup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_zendesk_backup.py b/tests/test_zendesk_backup.py index 99a49e7..1db5f27 100644 --- a/tests/test_zendesk_backup.py +++ b/tests/test_zendesk_backup.py @@ -7,7 +7,7 @@ @pytest.fixture(autouse=True) def mock_env_vars(): - with mock.patch.dict(os.environ, values={}): + with mock.patch.dict(os.environ, values={"S3_BUCKET": "test"}): yield @@ -37,11 +37,15 @@ def zendesk_backup_event(): } } + @pytest.fixture def json_ticket() -> dict[str, str]: return { "created_at": "2024-03-15T15:50:18Z" } + def test_athena_datetime(json_ticket): - assert zendesk_backup.add_athena_datetimes(json_ticket) == {json_ticket.update({"athena_created_at": "2024-03-15 15:50:18"})} + ticket_under_test = zendesk_backup.add_athena_datetimes(json_ticket) + json_ticket.update({"created_at_athena": "2024-03-15 15:50:18"}) + assert ticket_under_test == json_ticket From 3fbe9cd9f0f694450500adddb98dc95c912fab28 Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Wed, 1 May 2024 09:22:19 +0100 Subject: [PATCH 08/14] Small simplification to athena_datetime function --- lambda_/zendesk_backup/main.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lambda_/zendesk_backup/main.py b/lambda_/zendesk_backup/main.py index 325e253..6ef778a 100644 --- a/lambda_/zendesk_backup/main.py +++ b/lambda_/zendesk_backup/main.py @@ -66,9 +66,8 @@ def add_athena_datetimes(json_dict: dict[str, Union[int, str, dict, list]]) -> d """ res = {} for key, value in json_dict.items(): - if type(value) is str: - if re.match(r"(?i)2[\d\-]+t\d\d:", value): - res[f"{key}_athena"] = (value.lower().replace("t", " ").replace("z", "").split(".")[0]) + if type(value) is str and re.match(r"(?i)2[\d\-]+t\d\d:", value): + res[f"{key}_athena"] = (value.lower().replace("t", " ").replace("z", "").split(".")[0]) res.update(json_dict) return res From 7408d01c19ea0f57e8e7d81606287e05cd65fcae Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Wed, 1 May 2024 09:30:46 +0100 Subject: [PATCH 09/14] Test and document the lambda handler At the moment most of the lambda handler doesn't seem to do anything. However, I'm reluctant to delete the code as I'm not 100% certain. I've documented it as best as I can, and used an actual event from AWS as input to the test --- lambda_/zendesk_backup/main.py | 8 ++++++++ tests/test_zendesk_backup.py | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lambda_/zendesk_backup/main.py b/lambda_/zendesk_backup/main.py index 6ef778a..6ae5554 100644 --- a/lambda_/zendesk_backup/main.py +++ b/lambda_/zendesk_backup/main.py @@ -160,6 +160,14 @@ def save_helpcentre(article_ids: list = []): def lambda_handler(event, context): + """ + This is the lambda handler for the code above. At the moment, the only path that's covered is the final 'else'. In + future, we can send slightly different events through the EventBridge cron job. + + :param event: + :param context: + :return: + """ try: jprint({"event": event, "context": context}) diff --git a/tests/test_zendesk_backup.py b/tests/test_zendesk_backup.py index 1db5f27..ecfa8c5 100644 --- a/tests/test_zendesk_backup.py +++ b/tests/test_zendesk_backup.py @@ -14,7 +14,6 @@ def mock_env_vars(): @pytest.fixture def zendesk_backup_event(): return { - "_time": 1714446045.0087461, "context": "LambdaContext([aws_request_id=1f6df4b9-0a8e-434d-87e5-00f59f07c2f2," "log_group_name=/aws/lambda/zendesk-backup,log_stream_name=2024/04/30/[" "$LATEST]20ac904278394d5ba8163542dfa8e884,function_name=zendesk-backup,memory_limit_in_mb=512," @@ -49,3 +48,11 @@ def test_athena_datetime(json_ticket): ticket_under_test = zendesk_backup.add_athena_datetimes(json_ticket) json_ticket.update({"created_at_athena": "2024-03-15 15:50:18"}) assert ticket_under_test == json_ticket + + +def test_lambda_handler(zendesk_backup_event): + path = "lambda_.zendesk_backup.main" + with mock.patch(f"{path}.save_helpcentre") as mock_save_helpcentre, mock.patch(f"{path}.save_support") as mock_save_support: + zendesk_backup.lambda_handler(**zendesk_backup_event) + mock_save_helpcentre.assert_called_once_with() + mock_save_support.assert_called_once_with() From c66a36b9106a2c8c5d660f8d9cac1bcfb934b127 Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Wed, 1 May 2024 10:37:35 +0100 Subject: [PATCH 10/14] Test and document the get_key function --- lambda_/zendesk_backup/main.py | 10 +++++++++- tests/test_zendesk_backup.py | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lambda_/zendesk_backup/main.py b/lambda_/zendesk_backup/main.py index 6ae5554..636fc40 100644 --- a/lambda_/zendesk_backup/main.py +++ b/lambda_/zendesk_backup/main.py @@ -45,7 +45,15 @@ def jprint(obj): print(json.dumps(new_obj, default=str)) -def get_key(obj: dict) -> str: +def get_key(obj: Optional[dict[str, Any]]) -> Optional[str]: + """ + Get a URL from a dictionary and return the string consisting of the last two slash-separated elements, ie: + >>> get_key({"html_url": "example.com/a/long/path"}) + long/path + + :param obj: + :return: + """ res = None if obj and "html_url" in obj and "/" in obj["html_url"]: html_url_split = obj["html_url"].rsplit("/", 2) diff --git a/tests/test_zendesk_backup.py b/tests/test_zendesk_backup.py index ecfa8c5..f37fd6b 100644 --- a/tests/test_zendesk_backup.py +++ b/tests/test_zendesk_backup.py @@ -56,3 +56,8 @@ def test_lambda_handler(zendesk_backup_event): zendesk_backup.lambda_handler(**zendesk_backup_event) mock_save_helpcentre.assert_called_once_with() mock_save_support.assert_called_once_with() + + +def test_get_key(): + dictionary = {"html_url": "example.com/a/long/path"} + assert zendesk_backup.get_key(dictionary) == "long/path" From 06bdc47f6c975a7bb4c078d5510fed8d652d718c Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Wed, 1 May 2024 11:58:42 +0100 Subject: [PATCH 11/14] Create test for save_helpdesk function This function is enormous, and in order to refactor it I need to write a test to ensure that I know when it breaks --- lambda_/zendesk_backup/main.py | 26 +++++++++++++++++++++++++- tests/test_zendesk_backup.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/lambda_/zendesk_backup/main.py b/lambda_/zendesk_backup/main.py index 636fc40..5f1f002 100644 --- a/lambda_/zendesk_backup/main.py +++ b/lambda_/zendesk_backup/main.py @@ -1,7 +1,8 @@ +import dataclasses import os import json from typing import Optional, Union - +import da import boto3 import time import re @@ -109,6 +110,29 @@ def save_support(ticket_ids: Optional[list] = None): ) +@dataclasses.dataclass +class ZendeskObject: + html_url: str + id: str + + to_dict = dataclasses.asdict + + +@dataclasses.dataclass +class ZendeskCategory(ZendeskObject): + id: str + + +@dataclasses.dataclass +class ZendeskSection(ZendeskObject): + category_id: str + + +@dataclasses.dataclass +class ZendeskArticle(ZendeskObject): + section_id: str + + def save_helpcentre(article_ids: list = []): s3_bucket = get_s3_bucket() files = {} diff --git a/tests/test_zendesk_backup.py b/tests/test_zendesk_backup.py index f37fd6b..af0f535 100644 --- a/tests/test_zendesk_backup.py +++ b/tests/test_zendesk_backup.py @@ -1,8 +1,10 @@ import os from unittest import mock +from unittest.mock import Mock, call import pytest from lambda_.zendesk_backup import main as zendesk_backup +from lambda_.zendesk_backup.main import ZendeskCategory, ZendeskSection, ZendeskArticle @pytest.fixture(autouse=True) @@ -61,3 +63,29 @@ def test_lambda_handler(zendesk_backup_event): def test_get_key(): dictionary = {"html_url": "example.com/a/long/path"} assert zendesk_backup.get_key(dictionary) == "long/path" + + +@mock.patch("lambda_.zendesk_backup.main.zenpy_client") +@mock.patch("lambda_.zendesk_backup.main.s3_client") +def test_save_helpcenter(s3_client: Mock, zenpy_client: Mock): + category = ZendeskCategory("example.com/example/path", id="category_id") + section = ZendeskSection(category_id=category.id, html_url="example.com/section/path", id="section_id") + article = ZendeskArticle(html_url="example.com/article/path", section_id=section.id, id="article_id") + zenpy_client.return_value.help_center.categories.return_value = [category] + zenpy_client.return_value.help_center.sections.return_value = [section] + zenpy_client.return_value.help_center.articles.return_value = [article] + zendesk_backup.save_helpcentre() + s3_put: Mock = s3_client.return_value.put_object + s3_put.assert_has_calls( + [ + call(Body=b'{"html_url": "example.com/example/path", "id": "category_id"}', Bucket='test', + Key='helpcentre/example/path.json'), + call( + Body=b'{"html_url": "example.com/section/path", "id": "section_id", "category_id": "category_id"}', + Bucket='test', Key='helpcentre/example/path/section/path.json'), + call( + Body=b'{"html_url": "example.com/article/path", "id": "article_id", "section_id": "section_id"}', + Bucket='test', Key='helpcentre/example/path/section/path/article/path.json') + ], + any_order=True + ) From 9eff97806b599f879ea22b295f97ea94bd371ac8 Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Wed, 1 May 2024 12:39:24 +0100 Subject: [PATCH 12/14] Minor refactor to clarify code This uses a single method to do all the messy extraction. However, I can see there's still significant repetition. I'm going to try to make it clearer still --- lambda_/zendesk_backup/main.py | 69 ++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/lambda_/zendesk_backup/main.py b/lambda_/zendesk_backup/main.py index 5f1f002..e3f0752 100644 --- a/lambda_/zendesk_backup/main.py +++ b/lambda_/zendesk_backup/main.py @@ -1,8 +1,7 @@ import dataclasses import os import json -from typing import Optional, Union -import da +from typing import Optional, Union, Literal, Any import boto3 import time import re @@ -133,6 +132,36 @@ class ZendeskArticle(ZendeskObject): section_id: str +ObjectTypes = Literal["article", "section"] + + +def get_relations(subject: ObjectTypes) -> dict[str, ObjectTypes]: + relations = { + "article": { + "parent": "section" + }, + "section": { + "parent": "category" + } + } + return relations[subject] + + +def extract_substructure(object_type: ObjectTypes, zendesk_object: ZendeskObject, parent_id: str, parent_key: str, + article_ids: list) -> tuple[dict, str]: + relations = get_relations(object_type) + parent_type = relations["parent"] + substructure = {} + parent_type_id = f"{parent_type}_id" + if zendesk_object.__getattribute__(parent_type_id) == parent_id: + object_ref = get_key(zendesk_object.to_dict()) + if object_ref: + object_key = f"{parent_key}/{object_ref}" + if article_ids == [] or zendesk_object.id in article_ids: + substructure = {object_key: zendesk_object.to_dict()} + return substructure, object_key + + def save_helpcentre(article_ids: list = []): s3_bucket = get_s3_bucket() files = {} @@ -146,23 +175,25 @@ def save_helpcentre(article_ids: list = []): sections = zenpy_client().help_center.sections(category_id=category.id) for section in sections: - if section.category_id == category.id: - section_ref = get_key(section.to_dict()) - if section_ref: - section_key = f"{category_key}/{section_ref}" - if article_ids == []: - files[section_key] = section.to_dict() - - articles = zenpy_client().help_center.articles( - section_id=section.id - ) - for article in articles: - if article.section_id == section.id: - article_ref = get_key(article.to_dict()) - if article_ref: - article_key = f"{section_key}/{article_ref}" - if article_ids == [] or article.id in article_ids: - files[article_key] = article.to_dict() + section_file, section_key = extract_substructure( + object_type="section", + zendesk_object=section, + parent_id=category.id, + parent_key=category_key, + article_ids=article_ids + ) + files.update(section_file) + + articles = zenpy_client().help_center.articles(section_id=section.id) + for article in articles: + article_file, _ = extract_substructure( + object_type="article", + zendesk_object=article, + parent_id=section.id, + parent_key=section_key, + article_ids=article_ids, + ) + files.update(article_file) for file in files: filename = f"{file}.json" From b7075dfbc3d1a117caa201003c5925157cf11131 Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Wed, 1 May 2024 12:53:34 +0100 Subject: [PATCH 13/14] Separate extracting helpcenter files from saving them This should make it clearer, in future, what does what and where any failures are --- lambda_/zendesk_backup/main.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lambda_/zendesk_backup/main.py b/lambda_/zendesk_backup/main.py index e3f0752..6a8ee9c 100644 --- a/lambda_/zendesk_backup/main.py +++ b/lambda_/zendesk_backup/main.py @@ -162,15 +162,21 @@ def extract_substructure(object_type: ObjectTypes, zendesk_object: ZendeskObject return substructure, object_key -def save_helpcentre(article_ids: list = []): - s3_bucket = get_s3_bucket() +def extract_helpcenter(article_ids: list) -> dict[str, dict]: + """ + This takes a complex, nested set of dictionaries and flattens them into a more simple structure. With more time, + we could make this recursive and very simple. However, it's good enough as it is + + :param article_ids: + :return: + """ files = {} categories = zenpy_client().help_center.categories() for category in categories: category_key = get_key(category.to_dict()) if category_key: - if article_ids == []: + if not article_ids: files[category_key] = category.to_dict() sections = zenpy_client().help_center.sections(category_id=category.id) @@ -194,6 +200,15 @@ def save_helpcentre(article_ids: list = []): article_ids=article_ids, ) files.update(article_file) + return files + + +def save_helpcentre(article_ids=None): + if article_ids is None: + article_ids = [] + + s3_bucket = get_s3_bucket() + files = extract_helpcenter(article_ids) for file in files: filename = f"{file}.json" From e41979a5fbf3fb2bd7d0643199bf9e3e27d66e33 Mon Sep 17 00:00:00 2001 From: "jonathan.kerr" Date: Wed, 1 May 2024 14:10:21 +0100 Subject: [PATCH 14/14] Tidy up before pushing to production --- lambda_/zendesk_backup/main.py | 15 --------------- tests/test_zendesk_backup.py | 18 +++++++++++++++++- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/lambda_/zendesk_backup/main.py b/lambda_/zendesk_backup/main.py index 6a8ee9c..e826b17 100644 --- a/lambda_/zendesk_backup/main.py +++ b/lambda_/zendesk_backup/main.py @@ -117,21 +117,6 @@ class ZendeskObject: to_dict = dataclasses.asdict -@dataclasses.dataclass -class ZendeskCategory(ZendeskObject): - id: str - - -@dataclasses.dataclass -class ZendeskSection(ZendeskObject): - category_id: str - - -@dataclasses.dataclass -class ZendeskArticle(ZendeskObject): - section_id: str - - ObjectTypes = Literal["article", "section"] diff --git a/tests/test_zendesk_backup.py b/tests/test_zendesk_backup.py index af0f535..3cc60b7 100644 --- a/tests/test_zendesk_backup.py +++ b/tests/test_zendesk_backup.py @@ -1,10 +1,11 @@ +import dataclasses import os from unittest import mock from unittest.mock import Mock, call import pytest from lambda_.zendesk_backup import main as zendesk_backup -from lambda_.zendesk_backup.main import ZendeskCategory, ZendeskSection, ZendeskArticle +from lambda_.zendesk_backup.main import ZendeskObject @pytest.fixture(autouse=True) @@ -89,3 +90,18 @@ def test_save_helpcenter(s3_client: Mock, zenpy_client: Mock): ], any_order=True ) + + +@dataclasses.dataclass +class ZendeskCategory(ZendeskObject): + id: str + + +@dataclasses.dataclass +class ZendeskSection(ZendeskObject): + category_id: str + + +@dataclasses.dataclass +class ZendeskArticle(ZendeskObject): + section_id: str