phylogenetics is now under WITH_MATCH_NORMAL workflow; commented spec…

…tra plotting for phylogenetics matrices due to soon-to-be-fixed issue in sigprofiler plotting
Phuong-Le · Sep 3, 2024 · ce91886 · ce91886
1 parent 3afb8bd
commit ce91886
Show file tree

Hide file tree

Showing 5 changed files with 55 additions and 57 deletions.
diff --git a/main.nf b/main.nf
@@ -69,65 +69,64 @@ workflow {
                 FILTER_WITH_MATCH_NORMAL_INDEL(sample_paths_content_ch, vcfilter_config)
             }
         }
-
-    }
 
-    // phylogenetics is independent of whether there's a match normal or not
-    // indel phylogenetics will use output from snp phylogenetics if both workflows are run
-    if (params.phylogenetics == true) {
-        if (params.filter_snp == true) {
-            // only run this if there are more than 2 sample per donor (genotype_bin only has one column)
-            phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_SNP
-                .out
-                .filter { it[3].readLines().first().split(' ').size() > 2 }
-            PHYLOGENETICS(phylogenetics_input_ch, 'phylogenetics_snp_out') // phylogenetics without tree topology
-            if (params.filter_indel == true) {
-                phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_INDEL
+        // Phylogenetics
+        if (params.phylogenetics == true) {
+            if (params.filter_snp == true) {
+                // only run this if there are more than 2 sample per donor (genotype_bin only has one column)
+                phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_SNP
                     .out
                     .filter { it[3].readLines().first().split(' ').size() > 2 }
-                mutToTree_input_ch = PHYLOGENETICS.out.cross(phylogenetics_input_ch)
-                    .map( pdid -> tuple(pdid[0][0], pdid[0][1], pdid[1][1], pdid[1][2], pdid[1][3]) )
-                PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(mutToTree_input_ch, 'phylogenetics_indel_out')
+                PHYLOGENETICS(phylogenetics_input_ch, 'phylogenetics_snp_out') // phylogenetics without tree topology
+                if (params.filter_indel == true) {
+                    phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_INDEL
+                        .out
+                        .filter { it[3].readLines().first().split(' ').size() > 2 }
+                    mutToTree_input_ch = PHYLOGENETICS.out.cross(phylogenetics_input_ch)
+                        .map( pdid -> tuple(pdid[0][0], pdid[0][1], pdid[1][1], pdid[1][2], pdid[1][3]) )
+                    PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(mutToTree_input_ch, 'phylogenetics_indel_out')
+                }
             }
-        }
-        else if (params.filter_indel == true) {
-            // get topology
-            sample_paths = new File(params.sample_paths).getText('UTF-8')
-            topology = Channel.of(sample_paths)
-                .splitCsv( header: true, sep : '\t' )
-                .map { row -> tuple( row.pdid, row.topology ) }
-                .unique()
-
-            phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_INDEL
-                    .out
-                    .filter { it[3].readLines().first().split(' ').size() > 2 }
-
-            mutToTree_input_ch = topology.cross(phylogenetics_input_ch)
-                    .map( pdid -> tuple(pdid[0][0], pdid[0][1], pdid[1][1], pdid[1][2], pdid[1][3]) )
-
-            PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(mutToTree_input_ch, 'phylogenetics_indel_out') // phylogenetics with tree topology
-        }
-        else { // phylogenetics pipeline only 
-            assert params.with_topology != null 
-            if (params.with_topology == true) {
-                // process input sample_paths
-                outdir_basename = (params.phylogenetics_outdir_basename == "") ? 'phylogenetics_indel_out' : params.phylogenetics_outdir_basename
+            else if (params.filter_indel == true) {
+                // get topology
                 sample_paths = new File(params.sample_paths).getText('UTF-8')
-                sample_path_content = Channel.of(sample_paths)
+                topology = Channel.of(sample_paths)
                     .splitCsv( header: true, sep : '\t' )
-                    .map{ row -> tuple( row.pdid, row.topology, row.nr_path, row.nv_path, row.genotype_bin_path ) }
-                PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(sample_path_content, outdir_basename)
+                    .map { row -> tuple( row.pdid, row.topology ) }
+                    .unique()
+
+                phylogenetics_input_ch = FILTER_WITH_MATCH_NORMAL_INDEL
+                        .out
+                        .filter { it[3].readLines().first().split(' ').size() > 2 }
+
+                mutToTree_input_ch = topology.cross(phylogenetics_input_ch)
+                        .map( pdid -> tuple(pdid[0][0], pdid[0][1], pdid[1][1], pdid[1][2], pdid[1][3]) )
+
+                PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(mutToTree_input_ch, 'phylogenetics_indel_out') // phylogenetics with tree topology
             }
-            else {
-                // process input sample paths 
-                outdir_basename = (params.phylogenetics_outdir_basename == "") ? 'phylogenetics_snp_out' : params.phylogenetics_outdir_basename
-                sample_paths = new File(params.sample_paths).getText('UTF-8')
-                sample_path_content = Channel.of(sample_paths)
-                    .splitCsv( header: true, sep : '\t' )
-                    .map { row -> tuple( row.pdid, row.nr_path, row.nv_path, row.genotype_bin_path ) }
-                PHYLOGENETICS(sample_path_content, outdir_basename)
+            else { // phylogenetics pipeline only 
+                assert params.with_topology != null 
+                if (params.with_topology == true) {
+                    // process input sample_paths
+                    outdir_basename = (params.phylogenetics_outdir_basename == "") ? 'phylogenetics_indel_out' : params.phylogenetics_outdir_basename
+                    sample_paths = new File(params.sample_paths).getText('UTF-8')
+                    sample_path_content = Channel.of(sample_paths)
+                        .splitCsv( header: true, sep : '\t' )
+                        .map{ row -> tuple( row.pdid, row.topology, row.nr_path, row.nv_path, row.genotype_bin_path ) }
+                    PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY(sample_path_content, outdir_basename)
+                }
+                else {
+                    // process input sample paths 
+                    outdir_basename = (params.phylogenetics_outdir_basename == "") ? 'phylogenetics_snp_out' : params.phylogenetics_outdir_basename
+                    sample_paths = new File(params.sample_paths).getText('UTF-8')
+                    sample_path_content = Channel.of(sample_paths)
+                        .splitCsv( header: true, sep : '\t' )
+                        .map { row -> tuple( row.pdid, row.nr_path, row.nv_path, row.genotype_bin_path ) }
+                    PHYLOGENETICS(sample_path_content, outdir_basename)
+                }
             }
         }
+
     }
 
 }
diff --git a/sanger_lsf.config b/sanger_lsf.config
@@ -70,7 +70,7 @@ process {
     }
     withName: getPhylogeny {
         container = 'phuongle2510/lcm_phylogeny:0.1.1' 
-        memory = '100 MB'
+        memory = { 200.MB * task.attempt }
         queue = 'normal'
         cpus = 1
     }
@@ -98,12 +98,12 @@ process {
         cpus = 1
     }
     withName: sigprofilerPlotSnpBySamples {
-        memory = '500 MB'
+        memory = { 500.MB * task.attempt }
         queue = 'normal'
         cpus = 1
     }
     withName: sigprofilerPlotSnpByBranches {
-        memory = '500 MB'
+        memory = { 1000.MB * task.attempt }
         queue = 'normal'
         cpus = 1
     }

diff --git a/workflows/filter_with_match_normal_indels.nf b/workflows/filter_with_match_normal_indels.nf
@@ -15,8 +15,7 @@ workflow FILTER_WITH_MATCH_NORMAL_INDEL {
     main:
     // setup
     mut_type = 'indel'
-
-    // TIM BUTLER REFLAG GOES HERE
+
 
     // FILTER
     vcfiltered_ch = pindelFilter(sample_paths_content_ch, vcfilter_config, mut_type)

diff --git a/workflows/phylogenetics.nf b/workflows/phylogenetics.nf
@@ -16,7 +16,7 @@ workflow PHYLOGENETICS { // phylogenetics workflow for SNVs
     matrixGeneratorOnBranches(branched_vcf_with_header, outdir_basename)
     concatMatrices(matrixGeneratorOnBranches.out.toList(), outdir_basename)
     // plotting
-    sigprofilerPlotSnpByBranches(concatMatrices.out, outdir_basename)
+    // sigprofilerPlotSnpByBranches(concatMatrices.out, outdir_basename)
 
     emit:
     topology

diff --git a/workflows/phylogenetics_provided_topology.nf b/workflows/phylogenetics_provided_topology.nf
@@ -16,6 +16,6 @@ workflow PHYLOGENETICS_PROVIDED_TREE_TOPOLOGY { // phylogenetics workflow for IN
     matrixGeneratorOnBranches(branched_vcf_with_header, outdir_basename)
     concatMatrices(matrixGeneratorOnBranches.out.toList(), outdir_basename)
     // plotting
-    sigprofilerPlotSnpByBranches(concatMatrices.out, outdir_basename)
+    // sigprofilerPlotSnpByBranches(concatMatrices.out, outdir_basename)
 
 }