Skip to content

Commit

Permalink
phylogenetics is now under WITH_MATCH_NORMAL workflow; commented spec…
Browse files Browse the repository at this point in the history
…tra plotting for phylogenetics matrices due to soon-to-be-fixed issue in sigprofiler plotting
  • Loading branch information
Phuong-Le committed Sep 3, 2024
1 parent 3afb8bd commit ce91886
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 57 deletions.
99 changes: 49 additions & 50 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -69,65 +69,64 @@ workflow {
FILTER_WITH_MATCH_NORMAL_INDEL(sample_paths_content_ch, vcfilter_config)
}
}

}

// phylogenetics is independent of whether there's a match normal or not
// indel phylogenetics will use output from snp phylogenetics if both workflows are run
if (params.phylogenetics == true) {
if (params.filter_snp == true) {
// only run this if there are more than 2 sample per donor (genotype_bin only has one column)
phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_SNP
.out
.filter { it[3].readLines().first().split(' ').size() > 2 }
PHYLOGENETICS(phylogenetics_input_ch, 'phylogenetics_snp_out') // phylogenetics without tree topology
if (params.filter_indel == true) {
phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_INDEL
// Phylogenetics
if (params.phylogenetics == true) {
if (params.filter_snp == true) {
// only run this if there are more than 2 sample per donor (genotype_bin only has one column)
phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_SNP
.out
.filter { it[3].readLines().first().split(' ').size() > 2 }
mutToTree_input_ch = PHYLOGENETICS.out.cross(phylogenetics_input_ch)
.map( pdid -> tuple(pdid[0][0], pdid[0][1], pdid[1][1], pdid[1][2], pdid[1][3]) )
PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(mutToTree_input_ch, 'phylogenetics_indel_out')
PHYLOGENETICS(phylogenetics_input_ch, 'phylogenetics_snp_out') // phylogenetics without tree topology
if (params.filter_indel == true) {
phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_INDEL
.out
.filter { it[3].readLines().first().split(' ').size() > 2 }
mutToTree_input_ch = PHYLOGENETICS.out.cross(phylogenetics_input_ch)
.map( pdid -> tuple(pdid[0][0], pdid[0][1], pdid[1][1], pdid[1][2], pdid[1][3]) )
PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(mutToTree_input_ch, 'phylogenetics_indel_out')
}
}
}
else if (params.filter_indel == true) {
// get topology
sample_paths = new File(params.sample_paths).getText('UTF-8')
topology = Channel.of(sample_paths)
.splitCsv( header: true, sep : '\t' )
.map { row -> tuple( row.pdid, row.topology ) }
.unique()

phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_INDEL
.out
.filter { it[3].readLines().first().split(' ').size() > 2 }

mutToTree_input_ch = topology.cross(phylogenetics_input_ch)
.map( pdid -> tuple(pdid[0][0], pdid[0][1], pdid[1][1], pdid[1][2], pdid[1][3]) )

PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(mutToTree_input_ch, 'phylogenetics_indel_out') // phylogenetics with tree topology
}
else { // phylogenetics pipeline only
assert params.with_topology != null
if (params.with_topology == true) {
// process input sample_paths
outdir_basename = (params.phylogenetics_outdir_basename == "") ? 'phylogenetics_indel_out' : params.phylogenetics_outdir_basename
else if (params.filter_indel == true) {
// get topology
sample_paths = new File(params.sample_paths).getText('UTF-8')
sample_path_content = Channel.of(sample_paths)
topology = Channel.of(sample_paths)
.splitCsv( header: true, sep : '\t' )
.map{ row -> tuple( row.pdid, row.topology, row.nr_path, row.nv_path, row.genotype_bin_path ) }
PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(sample_path_content, outdir_basename)
.map { row -> tuple( row.pdid, row.topology ) }
.unique()

phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_INDEL
.out
.filter { it[3].readLines().first().split(' ').size() > 2 }

mutToTree_input_ch = topology.cross(phylogenetics_input_ch)
.map( pdid -> tuple(pdid[0][0], pdid[0][1], pdid[1][1], pdid[1][2], pdid[1][3]) )

PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(mutToTree_input_ch, 'phylogenetics_indel_out') // phylogenetics with tree topology
}
else {
// process input sample paths
outdir_basename = (params.phylogenetics_outdir_basename == "") ? 'phylogenetics_snp_out' : params.phylogenetics_outdir_basename
sample_paths = new File(params.sample_paths).getText('UTF-8')
sample_path_content = Channel.of(sample_paths)
.splitCsv( header: true, sep : '\t' )
.map { row -> tuple( row.pdid, row.nr_path, row.nv_path, row.genotype_bin_path ) }
PHYLOGENETICS(sample_path_content, outdir_basename)
else { // phylogenetics pipeline only
assert params.with_topology != null
if (params.with_topology == true) {
// process input sample_paths
outdir_basename = (params.phylogenetics_outdir_basename == "") ? 'phylogenetics_indel_out' : params.phylogenetics_outdir_basename
sample_paths = new File(params.sample_paths).getText('UTF-8')
sample_path_content = Channel.of(sample_paths)
.splitCsv( header: true, sep : '\t' )
.map{ row -> tuple( row.pdid, row.topology, row.nr_path, row.nv_path, row.genotype_bin_path ) }
PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(sample_path_content, outdir_basename)
}
else {
// process input sample paths
outdir_basename = (params.phylogenetics_outdir_basename == "") ? 'phylogenetics_snp_out' : params.phylogenetics_outdir_basename
sample_paths = new File(params.sample_paths).getText('UTF-8')
sample_path_content = Channel.of(sample_paths)
.splitCsv( header: true, sep : '\t' )
.map { row -> tuple( row.pdid, row.nr_path, row.nv_path, row.genotype_bin_path ) }
PHYLOGENETICS(sample_path_content, outdir_basename)
}
}
}

}

}
6 changes: 3 additions & 3 deletions sanger_lsf.config
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ process {
}
withName: getPhylogeny {
container = 'phuongle2510/lcm_phylogeny:0.1.1'
memory = '100 MB'
memory = { 200.MB * task.attempt }
queue = 'normal'
cpus = 1
}
Expand Down Expand Up @@ -98,12 +98,12 @@ process {
cpus = 1
}
withName: sigprofilerPlotSnpBySamples {
memory = '500 MB'
memory = { 500.MB * task.attempt }
queue = 'normal'
cpus = 1
}
withName: sigprofilerPlotSnpByBranches {
memory = '500 MB'
memory = { 1000.MB * task.attempt }
queue = 'normal'
cpus = 1
}
Expand Down
3 changes: 1 addition & 2 deletions workflows/filter_with_match_normal_indels.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ workflow FILTER_WITH_MATCH_NORMAL_INDEL {
main:
// setup
mut_type = 'indel'

// TIM BUTLER REFLAG GOES HERE


// FILTER
vcfiltered_ch = pindelFilter(sample_paths_content_ch, vcfilter_config, mut_type)
Expand Down
2 changes: 1 addition & 1 deletion workflows/phylogenetics.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow PHYLOGENETICS { // phylogenetics workflow for SNVs
matrixGeneratorOnBranches(branched_vcf_with_header, outdir_basename)
concatMatrices(matrixGeneratorOnBranches.out.toList(), outdir_basename)
// plotting
sigprofilerPlotSnpByBranches(concatMatrices.out, outdir_basename)
// sigprofilerPlotSnpByBranches(concatMatrices.out, outdir_basename)

emit:
topology
Expand Down
2 changes: 1 addition & 1 deletion workflows/phylogenetics_provided_topology.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ workflow PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY { // phylogenetics workflow for IN
matrixGeneratorOnBranches(branched_vcf_with_header, outdir_basename)
concatMatrices(matrixGeneratorOnBranches.out.toList(), outdir_basename)
// plotting
sigprofilerPlotSnpByBranches(concatMatrices.out, outdir_basename)
// sigprofilerPlotSnpByBranches(concatMatrices.out, outdir_basename)

}

0 comments on commit ce91886

Please sign in to comment.