Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge wdl to main #914

Merged
merged 1 commit into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 1.2
workflows:
- subclass: WDL
primaryDescriptorPath: /workflow/wdl/workflow.wdl
testParameterFiles:
- /workflow/wdl/input_ncov.json
authors:
- name: Nextstrain
5 changes: 5 additions & 0 deletions workflow/wdl/input_ncov.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"Nextstrain_WRKFLW.build_yaml" : "data/build.yaml",
"Nextstrain_WRKFLW.giturl" : "https://github.com/nextstrain/ncov/archive/refs/heads/master.zip",
"Nextstrain_WRKFLW.docker_path": "nextstrain/base:latest"
}
139 changes: 139 additions & 0 deletions workflow/wdl/workflow.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
version 1.0

# import "tasks/nextstrain.wdl" as nextstrain # <= modular method

workflow Nextstrain_WRKFLW {
input {
# Option 1: run the ncov example workflow
File? build_yaml

# Option 2: create a build_yaml from sequence and metadata
File? sequence_fasta
File? metadata_tsv
String? build_name
# It's possible all of the above files are provided

# Option 3? GISAID augur zip?
# File? gisaid_zip # tarball

String? active_builds # "Wisconsin,Minnesota,Iowa"

# By default, run the ncov workflow (can swap it for zika or something else)
String giturl = "https://github.com/nextstrain/ncov/archive/refs/heads/master.zip"
String docker_path = "nextstrain/base:latest"
Int? cpu
Int? memory # in GiB
Int? disk_size
}

if (defined(sequence_fasta)) {
call mk_buildconfig {
input:
sequence_fasta = select_first([sequence_fasta]),
metadata_tsv = select_first([metadata_tsv]),
build = build_name,
dockerImage = docker_path
}
}

# call nextstrain.nextstrain build as build { # <= modular method
call nextstrain_build as build {
input:
build_yaml = select_first([build_yaml, mk_buildconfig.buildconfig]), # Accepts Option 1 or Option 2
cpu = cpu,
memory = memory,
disk_size = disk_size,
dockerImage = docker_path,
giturl = giturl,
active_builds = active_builds
}

output {
Array[File] json_files = build.json_files
File auspice_zip = build.auspice_zip
}
}

# === Define Tasks
task mk_buildconfig {
input {
File sequence_fasta # Could change this to Array[File] and loop the HEREDOC
File metadata_tsv
String build = "example"
String dockerImage
Int cpu = 1
Int disk_size = 5
Float memory = 3.5
}
command {
cat << EOF > build.yaml
inputs:
- name: ~{build}
metadata: ~{metadata_tsv}
sequences: ~{sequence_fasta}
- name: references
metadata: data/references_metadata.tsv
sequences: data/references_sequences.fasta
EOF
}
output {
File buildconfig = "build.yaml"
}
runtime {
docker: dockerImage
cpu : cpu
memory: memory + " GiB"
disks: "local-disk " + disk_size + " HDD"
}
}

# Public Reference Datasets in case we want to add default "context" strains for ncov
# nextstrain remote ls s3://nextstrain-data/files/ncov/open &> list.txt
# files/ncov/open/LOCATION/metadata.tsv.xz where LOCATION = one of ['africa', 'asia', 'europe', 'global', 'north-america', 'oceania', 'south-america']
# files/ncov/open/LOCATION/sequences.fasta.xz
# TODO: since these are all s3, just generate string (avoid download and localazation/delocalization) for the mk_buildconfig
# Clarification: Do these need to be sampled down?

task nextstrain_build {
input {
File? build_yaml
String? active_builds # Wisconsin,Minnesota,Washington
String dockerImage = "nextstrain/base:latest"
String nextstrain_app = "nextstrain"
String giturl = "https://github.com/nextstrain/ncov/archive/refs/heads/master.zip"
Int cpu = 8 # Honestly, I'd max this out unless budget is a consideration.
Int disk_size = 30 # In GiB. Could also check size of sequence or metadata files
Float memory = 3.5
}
command {
# Pull ncov, zika or similar repository
wget -O master.zip ~{giturl}
INDIR=`unzip -Z1 master.zip | head -n1 | sed 's:/::g'`
unzip master.zip

# Max out the number of threads
PROC=`nproc`

# Run nextstrain
"~{nextstrain_app}" build \
--cpus $PROC \
--memory ~{memory}Gib \
--native $INDIR ~{"--configfile " + build_yaml} \
~{"--config active_builds=" + active_builds}

# Prepare output
mv $INDIR/auspice .
zip -r auspice.zip auspice
}
output {
File auspice_zip = "auspice.zip"
Array[File] json_files = glob("auspice/*.json")
# Target the s3
}
runtime {
docker: dockerImage
cpu : cpu
memory: memory + " GiB"
disks: "local-disk " + disk_size + " HDD"
}
}