GenBank fetch and ingest #1240
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GenBank fetch and ingest | |
on: | |
schedule: | |
# Note times are in UTC, which is 1 or 2 hours behind CET depending on daylight savings. | |
# | |
# Currently, we aim to trigger ingest every day at 18:07 UTC which is 19:07 CET (as of Mar 2022). | |
# Note the actual runs might be late. As of right now, the action starts around 20 past the hour. | |
# Numerous people were confused, about that, including me: | |
# - https://github.community/t/scheduled-action-running-consistently-late/138025/11 | |
# - https://github.com/github/docs/issues/3059 | |
# | |
# Note, '*' is a special character in YAML, so you have to quote this string. | |
# | |
# Docs: | |
# - https://docs.github.com/en/actions/learn-github-actions/events-that-trigger-workflows#schedule | |
# | |
# Tool that deciphers this particular format of crontab string: | |
# - https://crontab.guru/ | |
# | |
# Looks like you are about to modify this schedule? Make sure you also modify the schedule for the | |
# sister GISAID job, so that we don't need to keep two schedules in our heads. | |
- cron: '7 18 * * *' | |
# Manually triggered using `./vendored/trigger nextstrain/ncov-ingest genbank/fetch-and-ingest` (or `fetch-and-ingest`, which | |
# includes GISAID) | |
repository_dispatch: | |
types: | |
- genbank/fetch-and-ingest | |
- fetch-and-ingest | |
# Manually triggered using GitHub's UI | |
workflow_dispatch: | |
inputs: | |
trial_name: | |
description: >- | |
Short name for a trial run. | |
If provided, files will be uploaded to s3://nextstrain-data/files/ncov/open/trial/${TRIAL_NAME}/ | |
and downstream ncov and forecasts-ncov workflows will not be triggered. | |
WARNING: without the trial name, workflow will upload files to s3://nextstrain-data/files/ncov/open/ | |
required: false | |
fetch_from_database: | |
description: >- | |
Whether the workflow should fetch from upstream database. | |
If not selected (false), the workflow will start from existing data on S3. | |
type: boolean | |
required: false | |
default: true | |
image: | |
description: >- | |
Specific container image to use for build. | |
This will override the default image (nextstrain/ncov-ingest). | |
required: false | |
jobs: | |
set_config_overrides: | |
runs-on: ubuntu-latest | |
steps: | |
- id: config | |
name: Set config overrides | |
env: | |
TRIAL_NAME: ${{ inputs.trial_name }} | |
FETCH_FROM_DATABASE: ${{ github.event_name != 'workflow_dispatch' && true || inputs.fetch_from_database }} | |
run: | | |
config="--config" | |
if [[ "$FETCH_FROM_DATABASE" == true ]]; then | |
config+=" fetch_from_database=True" | |
else | |
config+=" fetch_from_database=False" | |
fi | |
if [[ "$TRIAL_NAME" ]]; then | |
config+=" trigger_rebuild=False" | |
config+=" trigger_counts=False" | |
config+=" s3_dst=s3://nextstrain-data/files/ncov/open/trial/${TRIAL_NAME}" | |
else | |
config+=" trigger_rebuild=True" | |
config+=" trigger_counts=True" | |
fi | |
echo "config=$config" >> "$GITHUB_OUTPUT" | |
outputs: | |
config_overrides: ${{ steps.config.outputs.config }} | |
fetch_and_ingest: | |
needs: [set_config_overrides] | |
permissions: | |
id-token: write | |
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master | |
secrets: inherit | |
with: | |
runtime: aws-batch | |
env: | | |
CONFIG_OVERRIDES: ${{ needs.set_config_overrides.outputs.config_overrides }} | |
GITHUB_RUN_ID: ${{ github.run_id }} | |
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image || 'nextstrain/ncov-ingest' }} | |
SLACK_CHANNELS: ${{ inputs.trial_name && vars.TEST_SLACK_CHANNEL || 'ncov-genbank-updates' }} | |
run: | | |
nextstrain build \ | |
--aws-batch \ | |
--detach \ | |
--no-download \ | |
--cpus 36 \ | |
--memory 68GiB \ | |
--env GITHUB_RUN_ID \ | |
--env SLACK_TOKEN \ | |
--env SLACK_CHANNELS \ | |
--env PAT_GITHUB_DISPATCH="$GH_TOKEN_NEXTSTRAIN_BOT_WORKFLOW_DISPATCH" \ | |
. \ | |
--stats snakemake_stats.json \ | |
--configfile config/genbank.yaml \ | |
$CONFIG_OVERRIDES |