From 291d832f4cb4ebeb43d128375541ff7fcebb0644 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Thu, 10 Oct 2024 11:41:15 -0700 Subject: [PATCH 1/2] ingest-to-phylo: Output message for S3 URLs without hash I noticed that even though files were update in ingest, the cache still didn't change and phylo did not run. Traced it back to incorrect S3 URLs leading to the use of the default hash in the cache. Output message to the GITHUB_STEP_SUMMARY so that we can immediately see if an S3 file doesn't have a sha256sum. --- .github/workflows/ingest-to-phylogenetic.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ingest-to-phylogenetic.yaml b/.github/workflows/ingest-to-phylogenetic.yaml index 9254c13..7799478 100644 --- a/.github/workflows/ingest-to-phylogenetic.yaml +++ b/.github/workflows/ingest-to-phylogenetic.yaml @@ -80,6 +80,11 @@ jobs: key="${s3path#*/}" s3_hash="$(aws s3api head-object --no-sign-request --bucket "$bucket" --key "$key" --query Metadata.sha256sum --output text 2>/dev/null || echo "$no_hash")" + + if [[ "${s3_hash}" == "${no_hash}" ]]; then + echo "No Metadata.sha256sum found for ${s3_url}" >> "$GITHUB_STEP_SUMMARY" + fi + echo "${s3_hash}" | tee -a ingest-output-sha256sum done From 3f3fcb969c8ac1be5a32cdc5b651b61f927cb142 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Thu, 10 Oct 2024 11:32:00 -0700 Subject: [PATCH 2/2] ingest-to-phylogenetic: Update cache `s3_urls` This workflow was originally copied from lassa which included the `all` prefix. Removing the prefix to match the WNV S3 files so that the cache can work as expected and trigger the phylo workflow. --- .github/workflows/ingest-to-phylogenetic.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ingest-to-phylogenetic.yaml b/.github/workflows/ingest-to-phylogenetic.yaml index 7799478..6a9b1ce 100644 --- a/.github/workflows/ingest-to-phylogenetic.yaml +++ b/.github/workflows/ingest-to-phylogenetic.yaml @@ -65,8 +65,8 @@ jobs: AWS_DEFAULT_REGION: ${{ vars.AWS_DEFAULT_REGION }} run: | s3_urls=( - "s3://nextstrain-data/files/workflows/WNV/all/metadata.tsv.zst" - "s3://nextstrain-data/files/workflows/WNV/all/sequences.fasta.zst" + "s3://nextstrain-data/files/workflows/WNV/metadata.tsv.zst" + "s3://nextstrain-data/files/workflows/WNV/sequences.fasta.zst" ) # Code below is modified from ingest/upload-to-s3