Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions docker/lr-canu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
FROM openjdk:8

RUN apt-get update && \
apt-get install libgomp1
apt-get install libgomp1 tree

RUN wget https://github.com/marbl/canu/releases/download/v2.0/canu-2.0.Linux-amd64.tar.xz && \
tar -xvf canu-2.0.Linux-amd64.tar.xz

ENV PATH="/canu-2.0/Linux-amd64/bin:${PATH}"
RUN wget https://github.com/marbl/canu/releases/download/v2.2/canu-2.2.Linux-amd64.tar.xz && \
tar -xJf canu-2.2.*.tar.xz


ENV PATH="/canu-2.2/bin:${PATH}"
RUN canu
18 changes: 8 additions & 10 deletions docker/lr-canu/Makefile
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
IMAGE_NAME = lr-canu
VERSION = 0.1.0

TAG1 = us.gcr.io/broad-dsp-lrma/$(IMAGE_NAME):$(VERSION)
TAG2 = us.gcr.io/broad-dsp-lrma/$(IMAGE_NAME):latest
VERSION = 0.2.0

TAG1 = us.gcr.io/broad-dsp-lrma/lr-canu:$(VERSION)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to revert back to the old way.

TAG2 = us.gcr.io/broad-dsp-lrma/lr-canu:latest


all: | build push

build:
docker build -t $(TAG1) -t $(TAG2) .

build_no_cache:
docker build --no-cache -t $(TAG1) -t $(TAG2) .
docker build -t $(TAG1) -t $(TAG2) .
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check tabs and spaces


push:
docker push $(TAG1)
docker push $(TAG2)
docker push $(TAG1)
docker push $(TAG2)
58 changes: 38 additions & 20 deletions wdl/tasks/Canu.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,31 @@ version 1.0
# A workflow that runs the Canu 3-step assembly (correct, trim, assemble).
# - Tested on a small genome (malaria ~23mb), larger genomes may require some changes
# including tweaks to the default resource allocation.
# - Currently assumes nanopore reads

##########################################################################################

import "Structs.wdl"

workflow Canu {
input {
File reads

String technology
Int genome_size
Float correct_error_rate
Float trim_error_rate
Float assemble_error_rate

String prefix
Int corrected_coverage
}

call Correct {
input:
reads = reads,
corrected_coverage = corrected_coverage,
genome_size = genome_size,
error_rate = correct_error_rate,
prefix = prefix
prefix = prefix,
technology = technology
}

call Trim {
Expand All @@ -35,6 +37,7 @@ workflow Canu {
corrected_reads = Correct.corrected_reads,
error_rate = trim_error_rate,
prefix = prefix,
technology = technology
}

call Assemble {
Expand All @@ -43,21 +46,24 @@ workflow Canu {
trimmed_reads = Trim.trimmed_reads,
error_rate = assemble_error_rate,
prefix = prefix,
technology = technology
}

output {
File fa = Assemble.canu_contigs_fasta
File log = Assemble.intermediate_log
}
}

# performs canu correct on raw reads, currently assumes ONT reads
# performs canu correct on raw reads
task Correct {
input {
File reads
Int genome_size
Int corrected_coverage
Float error_rate
String prefix

String technology
RuntimeAttr? runtime_attr_override
}

Expand All @@ -66,24 +72,28 @@ task Correct {
genome_size: "estimate on genome size (parameter to canu's 'genomeSize')"
error_rate: "parameter to canu's 'correctedErrorRate'"
prefix: "prefix to output files"
technology: "sequencing technology (nanopore or pacbio)"
}

String tech_specific_arg = if technology == 'ont' then "nanopore" else 'pacbio'
Int disk_size = 150 * ceil(size(reads, "GB"))

command <<<
set -euxo pipefail

canu -correct \
canu -correct corOutCoverage=~{corrected_coverage}\
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add space at then end of the line

-p ~{prefix} -d canu_correct_output \
genomeSize=~{genome_size}m \
genomeSize=~{genome_size}k \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename the variable to genome_sz_in_kb

corMaxEvidenceErate=0.15 \
correctedErrorRate=~{error_rate} \
-nanopore \
-~{tech_specific_arg} \
~{reads}
tree > intermediate.log
>>>

output {
File corrected_reads = "canu_correct_output/~{prefix}.correctedReads.fasta.gz"
File intermediate_log = "intermediate.log"
}

#########################
Expand All @@ -94,7 +104,7 @@ task Correct {
boot_disk_gb: 10,
preemptible_tries: 0,
max_retries: 0,
docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.1.0"
docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.2.0"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
Expand All @@ -115,6 +125,7 @@ task Trim {
Int genome_size
Float error_rate
String prefix
String technology

RuntimeAttr? runtime_attr_override
}
Expand All @@ -124,23 +135,26 @@ task Trim {
genome_size: "estimate on genome size (parameter to canu's 'genomeSize')"
corrected_reads: "parameter to canu's 'correctedErrorRate'"
prefix: "prefix to output files"
technology: "sequencing technology (nanopore or pacbio)"
}

String tech_specific_arg = if technology == 'ont' then "nanopore" else 'pacbio'
Int disk_size = 50 * ceil(size(corrected_reads, "GB"))

command <<<
set -euxo pipefail

canu -trim \
-p ~{prefix} -d canu_trim_output \
genomeSize=~{genome_size}m \
genomeSize=~{genome_size}k \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

correctedErrorRate=~{error_rate} \
-nanopore-corrected \
-~{tech_specific_arg}-corrected \
~{corrected_reads}
tree > intermediate.log
>>>

output {
File trimmed_reads = "canu_trim_output/~{prefix}.trimmedReads.fasta.gz"
File intermediate_log = "intermediate.log"
}

#########################
Expand All @@ -151,7 +165,7 @@ task Trim {
boot_disk_gb: 10,
preemptible_tries: 0,
max_retries: 0,
docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.1.0"
docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.2.0"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
Expand All @@ -172,7 +186,7 @@ task Assemble {
File trimmed_reads
Float error_rate
String prefix

String technology
RuntimeAttr? runtime_attr_override
}

Expand All @@ -181,34 +195,38 @@ task Assemble {
genome_size: "estimate on genome size (parameter to canu's 'genomeSize')"
error_rate: "parameter to canu's 'correctedErrorRate'"
prefix: "prefix to output files"
technology: "sequencing technology (nanopore or pacbio)"
}

String tech_specific_arg = if technology == 'ont' then "nanopore" else 'pacbio'
Int disk_size = 50 * ceil(size(trimmed_reads, "GB"))

command <<<
set -euxo pipefail

canu -assemble \
-p ~{prefix} -d canu_assemble_output \
genomeSize=~{genome_size}m \
genomeSize=~{genome_size}k \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

correctedErrorRate=~{error_rate} \
-nanopore-corrected \
-~{tech_specific_arg}-corrected \
~{trimmed_reads}
tree > intermediate.log
>>>

output {
File canu_contigs_fasta = "canu_assemble_output/~{prefix}.contigs.fasta"
File intermediate_log = "intermediate.log"
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 32,
mem_gb: 32,
mem_gb: 60,
disk_gb: disk_size,
boot_disk_gb: 10,
boot_disk_gb: 20,
preemptible_tries: 0,
max_retries: 0,
docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.1.0"
docker: "us.gcr.io/broad-dsp-lrma/lr-canu:0.2.0"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
Expand Down