-
Notifications
You must be signed in to change notification settings - Fork 25
update Dockerfile and Makefile for Canu version update. add parameter in Canu.wdl. #376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,12 @@ | ||
| FROM openjdk:8 | ||
|
|
||
| RUN apt-get update && \ | ||
| apt-get install libgomp1 | ||
| apt-get install libgomp1 tree | ||
|
|
||
| RUN wget https://github.com/marbl/canu/releases/download/v2.0/canu-2.0.Linux-amd64.tar.xz && \ | ||
| tar -xvf canu-2.0.Linux-amd64.tar.xz | ||
|
|
||
| ENV PATH="/canu-2.0/Linux-amd64/bin:${PATH}" | ||
| RUN wget https://github.com/marbl/canu/releases/download/v2.2/canu-2.2.Linux-amd64.tar.xz && \ | ||
| tar -xJf canu-2.2.*.tar.xz | ||
|
|
||
|
|
||
| ENV PATH="/canu-2.2/bin:${PATH}" | ||
| RUN canu |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,15 @@ | ||
| IMAGE_NAME = lr-canu | ||
| VERSION = 0.1.0 | ||
|
|
||
| TAG1 = us.gcr.io/broad-dsp-lrma/$(IMAGE_NAME):$(VERSION) | ||
| TAG2 = us.gcr.io/broad-dsp-lrma/$(IMAGE_NAME):latest | ||
| VERSION = 0.2.0 | ||
|
|
||
| TAG1 = us.gcr.io/broad-dsp-lrma/lr-canu:$(VERSION) | ||
| TAG2 = us.gcr.io/broad-dsp-lrma/lr-canu:latest | ||
|
|
||
|
|
||
| all: | build push | ||
|
|
||
| build: | ||
| docker build -t $(TAG1) -t $(TAG2) . | ||
|
|
||
| build_no_cache: | ||
| docker build --no-cache -t $(TAG1) -t $(TAG2) . | ||
| docker build -t $(TAG1) -t $(TAG2) . | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check tabs and spaces |
||
|
|
||
| push: | ||
| docker push $(TAG1) | ||
| docker push $(TAG2) | ||
| docker push $(TAG1) | ||
| docker push $(TAG2) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,29 +4,31 @@ version 1.0 | |
| # A workflow that runs the Canu 3-step assembly (correct, trim, assemble). | ||
| # - Tested on a small genome (malaria ~23mb), larger genomes may require some changes | ||
| # including tweaks to the default resource allocation. | ||
| # - Currently assumes nanopore reads | ||
|
|
||
| ########################################################################################## | ||
|
|
||
| import "Structs.wdl" | ||
|
|
||
| workflow Canu { | ||
| input { | ||
| File reads | ||
|
|
||
| String technology | ||
| Int genome_size | ||
| Float correct_error_rate | ||
| Float trim_error_rate | ||
| Float assemble_error_rate | ||
|
|
||
| String prefix | ||
| Int corrected_coverage | ||
| } | ||
|
|
||
| call Correct { | ||
| input: | ||
| reads = reads, | ||
| corrected_coverage = corrected_coverage, | ||
| genome_size = genome_size, | ||
| error_rate = correct_error_rate, | ||
| prefix = prefix | ||
| prefix = prefix, | ||
| technology = technology | ||
| } | ||
|
|
||
| call Trim { | ||
|
|
@@ -35,6 +37,7 @@ workflow Canu { | |
| corrected_reads = Correct.corrected_reads, | ||
| error_rate = trim_error_rate, | ||
| prefix = prefix, | ||
| technology = technology | ||
| } | ||
|
|
||
| call Assemble { | ||
|
|
@@ -43,21 +46,24 @@ workflow Canu { | |
| trimmed_reads = Trim.trimmed_reads, | ||
| error_rate = assemble_error_rate, | ||
| prefix = prefix, | ||
| technology = technology | ||
| } | ||
|
|
||
| output { | ||
| File fa = Assemble.canu_contigs_fasta | ||
| File log = Assemble.intermediate_log | ||
| } | ||
| } | ||
|
|
||
| # performs canu correct on raw reads, currently assumes ONT reads | ||
| # performs canu correct on raw reads | ||
| task Correct { | ||
| input { | ||
| File reads | ||
| Int genome_size | ||
| Int corrected_coverage | ||
| Float error_rate | ||
| String prefix | ||
|
|
||
| String technology | ||
| RuntimeAttr? runtime_attr_override | ||
| } | ||
|
|
||
|
|
@@ -66,24 +72,28 @@ task Correct { | |
| genome_size: "estimate on genome size (parameter to canu's 'genomeSize')" | ||
| error_rate: "parameter to canu's 'correctedErrorRate'" | ||
| prefix: "prefix to output files" | ||
| technology: "sequencing technology (nanopore or pacbio)" | ||
| } | ||
|
|
||
| String tech_specific_arg = if technology == 'ont' then "nanopore" else 'pacbio' | ||
| Int disk_size = 150 * ceil(size(reads, "GB")) | ||
|
|
||
| command <<< | ||
| set -euxo pipefail | ||
|
|
||
| canu -correct \ | ||
| canu -correct corOutCoverage=~{corrected_coverage}\ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add space at then end of the line |
||
| -p ~{prefix} -d canu_correct_output \ | ||
| genomeSize=~{genome_size}m \ | ||
| genomeSize=~{genome_size}k \ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rename the variable to |
||
| corMaxEvidenceErate=0.15 \ | ||
| correctedErrorRate=~{error_rate} \ | ||
| -nanopore \ | ||
| -~{tech_specific_arg} \ | ||
| ~{reads} | ||
| tree > intermediate.log | ||
| >>> | ||
|
|
||
| output { | ||
| File corrected_reads = "canu_correct_output/~{prefix}.correctedReads.fasta.gz" | ||
| File intermediate_log = "intermediate.log" | ||
| } | ||
|
|
||
| ######################### | ||
|
|
@@ -94,7 +104,7 @@ task Correct { | |
| boot_disk_gb: 10, | ||
| preemptible_tries: 0, | ||
| max_retries: 0, | ||
| docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.1.0" | ||
| docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.2.0" | ||
| } | ||
| RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) | ||
| runtime { | ||
|
|
@@ -115,6 +125,7 @@ task Trim { | |
| Int genome_size | ||
| Float error_rate | ||
| String prefix | ||
| String technology | ||
|
|
||
| RuntimeAttr? runtime_attr_override | ||
| } | ||
|
|
@@ -124,23 +135,26 @@ task Trim { | |
| genome_size: "estimate on genome size (parameter to canu's 'genomeSize')" | ||
| corrected_reads: "parameter to canu's 'correctedErrorRate'" | ||
| prefix: "prefix to output files" | ||
| technology: "sequencing technology (nanopore or pacbio)" | ||
| } | ||
|
|
||
| String tech_specific_arg = if technology == 'ont' then "nanopore" else 'pacbio' | ||
| Int disk_size = 50 * ceil(size(corrected_reads, "GB")) | ||
|
|
||
| command <<< | ||
| set -euxo pipefail | ||
|
|
||
| canu -trim \ | ||
| -p ~{prefix} -d canu_trim_output \ | ||
| genomeSize=~{genome_size}m \ | ||
| genomeSize=~{genome_size}k \ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
| correctedErrorRate=~{error_rate} \ | ||
| -nanopore-corrected \ | ||
| -~{tech_specific_arg}-corrected \ | ||
| ~{corrected_reads} | ||
| tree > intermediate.log | ||
| >>> | ||
|
|
||
| output { | ||
| File trimmed_reads = "canu_trim_output/~{prefix}.trimmedReads.fasta.gz" | ||
| File intermediate_log = "intermediate.log" | ||
| } | ||
|
|
||
| ######################### | ||
|
|
@@ -151,7 +165,7 @@ task Trim { | |
| boot_disk_gb: 10, | ||
| preemptible_tries: 0, | ||
| max_retries: 0, | ||
| docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.1.0" | ||
| docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.2.0" | ||
| } | ||
| RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) | ||
| runtime { | ||
|
|
@@ -172,7 +186,7 @@ task Assemble { | |
| File trimmed_reads | ||
| Float error_rate | ||
| String prefix | ||
|
|
||
| String technology | ||
| RuntimeAttr? runtime_attr_override | ||
| } | ||
|
|
||
|
|
@@ -181,34 +195,38 @@ task Assemble { | |
| genome_size: "estimate on genome size (parameter to canu's 'genomeSize')" | ||
| error_rate: "parameter to canu's 'correctedErrorRate'" | ||
| prefix: "prefix to output files" | ||
| technology: "sequencing technology (nanopore or pacbio)" | ||
| } | ||
|
|
||
| String tech_specific_arg = if technology == 'ont' then "nanopore" else 'pacbio' | ||
| Int disk_size = 50 * ceil(size(trimmed_reads, "GB")) | ||
|
|
||
| command <<< | ||
| set -euxo pipefail | ||
|
|
||
| canu -assemble \ | ||
| -p ~{prefix} -d canu_assemble_output \ | ||
| genomeSize=~{genome_size}m \ | ||
| genomeSize=~{genome_size}k \ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
| correctedErrorRate=~{error_rate} \ | ||
| -nanopore-corrected \ | ||
| -~{tech_specific_arg}-corrected \ | ||
| ~{trimmed_reads} | ||
| tree > intermediate.log | ||
| >>> | ||
|
|
||
| output { | ||
| File canu_contigs_fasta = "canu_assemble_output/~{prefix}.contigs.fasta" | ||
| File intermediate_log = "intermediate.log" | ||
| } | ||
|
|
||
| ######################### | ||
| RuntimeAttr default_attr = object { | ||
| cpu_cores: 32, | ||
| mem_gb: 32, | ||
| mem_gb: 60, | ||
| disk_gb: disk_size, | ||
| boot_disk_gb: 10, | ||
| boot_disk_gb: 20, | ||
| preemptible_tries: 0, | ||
| max_retries: 0, | ||
| docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.1.0" | ||
| docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.2.0" | ||
| } | ||
| RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) | ||
| runtime { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to revert back to the old way.