Skip to content

Commit

Permalink
Update DiffBind to 3.12.0 and add some requested features (galaxyproj…
Browse files Browse the repository at this point in the history
…ect#6125)

* Update DiffBind to 3.12.0 and add some requested features

* update R script

* add minOverlap to dba.peakset

* Update tools/diffbind/diffbind.R

Co-authored-by: mtekman <[email protected]>

* Update tools/diffbind/diffbind.R

Co-authored-by: mtekman <[email protected]>

* Update tools/diffbind/diffbind.xml

Co-authored-by: mtekman <[email protected]>

* put symlink back

---------

Co-authored-by: Björn Grüning <[email protected]>
Co-authored-by: mtekman <[email protected]>
  • Loading branch information
3 people authored Jul 8, 2024
1 parent 8bdd212 commit fd148a1
Show file tree
Hide file tree
Showing 14 changed files with 189 additions and 1,440 deletions.
33 changes: 24 additions & 9 deletions tools/diffbind/diffbind.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@ args <- commandArgs(trailingOnly = TRUE)
spec <- matrix(c(
"infile", "i", 1, "character",
"outfile", "o", 1, "character",
"method", "m", 1, "character",
"scorecol", "n", 1, "integer",
"lowerbetter", "l", 1, "logical",
"summits", "s", 1, "integer",
"th", "t", 1, "double",
"minoverlap", "O", 1, "integer",
"use_blacklist", "B", 0, "logical",
"format", "f", 1, "character",
"plots", "p", 2, "character",
"bmatrix", "b", 0, "logical",
Expand Down Expand Up @@ -73,7 +76,11 @@ if (length(ctrls) != 0) {
)
}

sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter)
sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter, minOverlap = opt$minoverlap)

if (!is.null(opt$use_blacklist)) {
sample <- dba.blacklist(sample, blacklist = TRUE)
}

if (!is.null(opt$summits)) {
sample_count <- dba.count(sample, summits = opt$summits)
Expand All @@ -82,17 +89,25 @@ if (!is.null(opt$summits)) {
}

sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2)
sample_analyze <- dba.analyze(sample_contrast)
diff_bind <- dba.report(sample_analyze, th = opt$th)

if (opt$method == "DBA_DESEQ2") {
method <- DBA_DESEQ2
} else if (opt$method == "DBA_EDGER") {
method <- DBA_EDGER
}

sample_analyze <- dba.analyze(sample_contrast, method = method, bBlacklist = FALSE, bGreylist = FALSE)

diff_bind <- dba.report(sample_analyze, th = opt$th, method = method)

# Generate plots
if (!is.null(opt$plots)) {
pdf(opt$plots)
orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th)
dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3)
dba.plotMA(sample_analyze, th = opt$th)
dba.plotVolcano(sample_analyze, th = opt$th)
dba.plotBox(sample_analyze, th = opt$th)
orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th, method = method)
dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3, method = method)
dba.plotMA(sample_analyze, th = opt$th, method = method)
dba.plotVolcano(sample_analyze, th = opt$th, method = method)
dba.plotBox(sample_analyze, th = opt$th, method = method)
dev.off()
}

Expand Down Expand Up @@ -140,7 +155,7 @@ write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names

# Output binding affinity scores
if (!is.null(opt$bmatrix)) {
bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME, minOverlap = opt$minoverlap)
# Output as 0-based tabular
bmat <- data.frame(
Chrom = bmat[, 1],
Expand Down
98 changes: 82 additions & 16 deletions tools/diffbind/diffbind.xml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<tool id="diffbind" name="DiffBind" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description> differential binding analysis of ChIP-Seq peak data</description>
<macros>
<token name="@TOOL_VERSION@">2.10.0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@TOOL_VERSION@">3.12.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">22.05</token>
</macros>
<xrefs>
Expand All @@ -11,9 +11,8 @@
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">bioconductor-diffbind</requirement>
<requirement type="package" version="3.5.1">r-base</requirement>
<requirement type="package" version="1.20.3">r-getopt</requirement>
<requirement type="package" version="0.2.20">r-rjson</requirement>
<requirement type="package" version="1.20.4">r-getopt</requirement>
<requirement type="package" version="4.0.16">bioconductor-edger</requirement>
</requirements>
<stdio>
<regex match="Execution halted"
Expand Down Expand Up @@ -71,7 +70,7 @@ echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanil
#for $ctrl in $g.bamcontrol:
#set $ctrl_name = re.sub('[^\w\-]', '_', str($ctrl.element_identifier))
#set $ctrl_file = $ctrl_name + "-bamcontrol.bam"
#set ctrl_index = $ctrl_name + "-bamcontrol.bai"
#set $ctrl_index = $ctrl_name + "-bamcontrol.bai"
#if $ctrl_file not in json.dumps($temp_factor):
ln -s '${ctrl}' '${ctrl_file}' &&
ln -s '${ctrl.metadata.bam_index}' '${ctrl_index}' &&
Expand All @@ -94,13 +93,15 @@ Rscript '$__tool_directory__/diffbind.R'
-i '#echo json.dumps(temp_factor_names)#'
-o '$outfile'
-m '$method'
-t $th
-f $out.format
-p '$plots'
#if $scorecol:
-n "$scorecol"
-O $minoverlap
#if $use_blacklist:
-B
#end if
-n $scorecol
#if $lowerbetter:
-l "$lowerbetter"
#end if
Expand Down Expand Up @@ -138,8 +139,17 @@ Rscript '$__tool_directory__/diffbind.R'
<param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM files" help="Specify the Read BAM files used in the Peak calling. The input order of the BAM files for the samples MUST match the input order of the peaks files."/>
<param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM files" help="If specifying a control BAM file, all samples are required to specify one, see Help section below. The input order of the BAM files for the samples MUST match the input order of the peaks files."/>
</repeat>

<param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores. Default: 8 (narrowPeak)">
<param name="method" type="select" label="Underlying method by which to analyze differential binding affinity">
<option value="DBA_DESEQ2" selected="True">DESeq2</option>
<option value="DBA_EDGER">edgeR</option>
</param>
<param name="use_blacklist" type="boolean" truevalue="True" falsevalue="" checked="False" label="Filters peak intervals that overlap a blacklist from ENCODE" help="Works with human, mouse, worm and fly. Assembly version is determined from the BAM files." />
<param name="minoverlap" type="integer" min="1" value="2" label="Only include peaks in at least this many peaksets in the main binding matrix">
<sanitizer>
<valid initial="string.digits"/>
</sanitizer>
</param>
<param name="scorecol" type="integer" min="0" value="5" label="Score Column" help="Column in peak files that contains peak scores. Default: 5 (narrowPeak)">
<sanitizer>
<valid initial="string.digits"/>
</sanitizer>
Expand Down Expand Up @@ -226,6 +236,28 @@ Rscript '$__tool_directory__/diffbind.R'
</assert_contents>
</output>
</test>
<!-- Ensure EDGER works -->
<test expect_num_outputs="3">
<repeat name="rep_group">
<param name="groupName" value="Resistant"/>
<param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
<param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
</repeat>
<repeat name="rep_group">
<param name="groupName" value="Responsive"/>
<param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
<param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
</repeat>
<param name="scorecol" value="5" />
<param name="method" value="DBA_EDGER" />
<param name="format" value="interval"/>
<param name="pdf" value="True" />
<param name="binding_matrix" value="True" />
<param name="rscript" value="False"/>
<output name="outfile" ftype="interval" value="out_diffbind_edger.interval" />
<output name="plots" value="out_plots_edger.pdf" compare="sim_size" />
<output name="binding_matrix" value="out_binding_matrix_edger.tab" />
</test>
<!-- Ensure control BAMs input works -->
<test expect_num_outputs="1">
<repeat name="rep_group">
Expand Down Expand Up @@ -276,6 +308,40 @@ Rscript '$__tool_directory__/diffbind.R'
<param name="format" value="tabular"/>
<output name="outfile" ftype="tabular" file="out_diffbind.tab" />
</test>
<!-- Ensure minoverlap works -->
<test expect_num_outputs="1">
<repeat name="rep_group">
<param name="groupName" value="Resistant"/>
<param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
<param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
</repeat>
<repeat name="rep_group">
<param name="groupName" value="Responsive"/>
<param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
<param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
</repeat>
<param name="minoverlap" value="1" />
<param name="scorecol" value="5" />
<param name="format" value="tabular"/>
<output name="outfile" ftype="tabular" file="out_diffbind_minoverlap1.tab" />
</test>
<!-- Ensure blacklist filtering works -->
<test expect_num_outputs="1">
<repeat name="rep_group">
<param name="groupName" value="Resistant"/>
<param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
<param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
</repeat>
<repeat name="rep_group">
<param name="groupName" value="Responsive"/>
<param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
<param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
</repeat>
<param name="use_blacklist" value="True"/>
<param name="scorecol" value="5" />
<param name="format" value="tabular"/>
<output name="outfile" ftype="tabular" file="out_diffbind_blacklist.tab" />
</test>
</tests>
<help><![CDATA[
Expand Down Expand Up @@ -420,11 +486,11 @@ Example - **BED format**:
===== ====== ====== ======== ===== ======
Chrom Start End Name Score Strand
===== ====== ====== ======== ===== ======
chr18 394599 396513 DiffBind 0 \.
chr18 111566 112005 DiffBind 0 \.
chr18 346463 347342 DiffBind 0 \.
chr18 399013 400382 DiffBind 0 \.
chr18 371109 372102 DiffBind 0 \.
chr18 394599 396513 DiffBind 0 \.
chr18 111566 112005 DiffBind 0 \.
chr18 346463 347342 DiffBind 0 \.
chr18 399013 400382 DiffBind 0 \.
chr18 371109 372102 DiffBind 0 \.
===== ====== ====== ======== ===== ======
Example - **Tabular format**:
Expand Down
Binary file modified tools/diffbind/test-data/DiffBind_analysis.RData
Binary file not shown.
Loading

0 comments on commit fd148a1

Please sign in to comment.