Merge branch 'release/4.4.0'

cancerit · Aug 19, 2020 · 0e99853 · 0e99853
2 parents 7943f3d + a5b50e0
commit 0e99853
Show file tree

Hide file tree

Showing 9 changed files with 389 additions and 44 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,5 +1,10 @@
 # Changes
 
+## 4.4.0
+
+* Added ascatCounts to produce counts files
+* Modified ascat wrapper to handle count files as input
+
 ## 4.3.4
 
 * Eliminated redundant logic from setup script

diff --git a/perl/MANIFEST b/perl/MANIFEST
@@ -1,5 +1,6 @@
 bin/ascat.pl
 bin/ascatCnToVCF.pl
+bin/ascatCounts.pl
 bin/ascatFailedCnCsv.pl
 bin/ascatToBigWig.pl
 bin/utilities/ascatFaiChunk.pl
@@ -21,23 +22,24 @@ docs/pod_html/_whtprpk.css
 docs/pod_html/_whtpurk.css
 docs/pod_html/ascat.html
 docs/pod_html/ascatCnToVCF.html
+docs/pod_html/ascatCounts.html
 docs/pod_html/ascatFailedCnCsv.html
 docs/pod_html/ascatToBigWig.html
 docs/pod_html/index.html
 docs/pod_html/Sanger/CGP/Ascat.html
 docs/pod_html/utilities/ascatSnpPanelGenerator.html
-docs/reports_html/blib-lib-Sanger-CGP-Ascat-Implement-pm--branch.html
-docs/reports_html/blib-lib-Sanger-CGP-Ascat-Implement-pm--subroutine.html
-docs/reports_html/blib-lib-Sanger-CGP-Ascat-Implement-pm.html
-docs/reports_html/blib-lib-Sanger-CGP-Ascat-pm--subroutine.html
-docs/reports_html/blib-lib-Sanger-CGP-Ascat-pm.html
 docs/reports_html/common.js
 docs/reports_html/cover.14
 docs/reports_html/cover.css
 docs/reports_html/coverage.html
 docs/reports_html/css.js
 docs/reports_html/digests
 docs/reports_html/index.html
+docs/reports_html/lib-Sanger-CGP-Ascat-Implement-pm--branch.html
+docs/reports_html/lib-Sanger-CGP-Ascat-Implement-pm--subroutine.html
+docs/reports_html/lib-Sanger-CGP-Ascat-Implement-pm.html
+docs/reports_html/lib-Sanger-CGP-Ascat-pm--subroutine.html
+docs/reports_html/lib-Sanger-CGP-Ascat-pm.html
 docs/reports_html/standardista-table-sorting.js
 docs/reports_text/coverage.txt
 lib/Sanger/CGP/Ascat.pm

diff --git a/perl/Makefile.PL b/perl/Makefile.PL
@@ -34,6 +34,7 @@ WriteMakefile(
                         bin/ascatCnToVCF.pl
                         bin/ascatFailedCnCsv.pl
                         bin/ascatToBigWig.pl
+                        bin/ascatCounts.pl
                         bin/utilities/ascatFaiChunk.pl
                         bin/utilities/ascatSnpPanelFromVcfs.pl
                         bin/utilities/ascatSnpPanelGcCorrections.pl

diff --git a/perl/bin/ascat.pl b/perl/bin/ascat.pl
@@ -24,6 +24,7 @@
 BEGIN {
   use Cwd qw(abs_path);
   use File::Basename;
+  use File::Path qw(make_path);
   unshift (@INC,dirname(abs_path($0)).'/../lib');
 };
 
@@ -56,14 +57,19 @@ BEGIN
 
   # register any process that can run in parallel here
   $threads->add_function('allele_count', \&Sanger::CGP::Ascat::Implement::allele_count);
+  $threads->add_function('deploy_counts', \&Sanger::CGP::Ascat::Implement::deploy_counts);
 
   # start processes here (in correct order obviously), add conditions for skipping based on 'process' option
-  if(!exists $options->{'process'} || $options->{'process'} eq 'allele_count') {
+  if( ($options->{'counts_input'} == 0) && (!exists $options->{'process'} || $options->{'process'} eq 'allele_count')) {
     my $jobs = $options->{'lociChrsBySample'};
     $jobs = $options->{'limit'} if(exists $options->{'limit'} && defined $options->{'limit'});
     $threads->run($jobs, 'allele_count', $options);
   }
-
+  if ( $options->{'counts_input'} == 1) {
+    my $ascat_out = File::Spec->catdir(abs_path($options->{'tmp'}),'ascat');
+    make_path($ascat_out) unless(-e $ascat_out);
+    $threads->run(2, 'deploy_counts', $options);    
+  }
   Sanger::CGP::Ascat::Implement::ascat($options) if(!exists $options->{'process'} || $options->{'process'} eq 'ascat');
   if(!exists $options->{'process'} || $options->{'process'} eq 'finalise') {
     Sanger::CGP::Ascat::Implement::finalise($options);
@@ -110,6 +116,8 @@ sub setup {
               'f|force' => \$opts{'force'},
               'nc|noclean' => \$opts{'noclean'},
               'nb|nobigwig' => \$opts{'nobigwig'},
+              'tn|t_name=s' => \$opts{'t_name'},
+              'nn|n_name=s' => \$opts{'n_name'}
   ) or pod2usage(2);
 
   pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'});
@@ -145,6 +153,23 @@ sub setup {
 
   PCAP::Cli::file_for_reading('tumour', $opts{'tumour'});
   PCAP::Cli::file_for_reading('normal', $opts{'normal'});
+
+  #special case of couts file as input
+  $opts{'counts_input'} = 0;
+  if ( ( $opts{'tumour'} =~ /\.count\.gz$/ ) &&  ( $opts{'normal'} =~ /\.count\.gz$/ ) ) {
+    warn qq{NOTE: using counts inputs, skipping allelecount step\n};
+    if ( ( !defined($opts{'t_name'} )) || ( ! defined($opts{'n_name'})) ){
+      pod2usage(-msg  => "\nERROR: Must specify normal & tumour names when using count files as input\n", -verbose => 1,  -output => \*STDERR);
+    }
+    pod2usage(-msg  => "\nERROR: Must specify assembly (-ra ) when using count files as input\n", -verbose => 1,  -output => \*STDERR) unless ( defined( $opts{'assembly'} ) );
+    pod2usage(-msg  => "\nERROR: Must specify species (-rs ) when using count files as input\n", -verbose => 1,  -output => \*STDERR) unless ( defined( $opts{'species'} ) );
+    pod2usage(-msg  => "\nERROR: Must specigy platform (-pl ) when using count files as input\n", -verbose => 1,  -output => \*STDERR) unless ( defined( $opts{'platform'} ) );
+    pod2usage(-msg  => "\nERROR: Must specify genderChr when using count files as input\n", -verbose => 1,  -output => \*STDERR) unless ( defined( $opts{'genderChr'} ) );
+    $opts{'counts_input'} = 1;
+  }
+  if ( !( $opts{'tumour'} =~ /\.count\.gz$/ ) !=  !( $opts{'normal'} =~ /\.count\.gz$/ ) ) {
+    pod2usage(-msg  => "\nERROR: Both tumour and normal need to be count files.\n", -verbose => 1,  -output => \*STDERR);
+  }
   PCAP::Cli::file_for_reading('snp_gc', $opts{'snp_gc'});
   PCAP::Cli::file_for_reading('reference', $opts{'reference'});
   PCAP::Cli::out_dir_check('outdir', $opts{'outdir'});
@@ -244,8 +269,8 @@ =head1 SYNOPSIS
   Required parameters
 
     -outdir       -o    Folder to output result to.
-    -tumour       -t    Tumour BAM/CRAM file
-    -normal       -n    Normal BAM/CRAM file
+    -tumour       -t    Tumour BAM/CRAM/counts file
+    -normal       -n    Normal BAM/CRAM/counts file
     -reference    -r    Reference fasta
     -snp_gc       -sg   Snp GC correction file
     -protocol     -pr   Sequencing protocol (e.g. WGS, WXS)
@@ -279,6 +304,8 @@ =head1 SYNOPSIS
     -noclean      -nc   Finalise results but don't clean up the tmp directory.
                         - Useful when including a manual check and restarting ascat with new pu and pi params.
     -nobigwig     -nb   Don't generate BigWig files.
+    -t_name       -tn   Tumour name to use when using count files as input
+    -n_name       -nn   Noraml name to use when using count files as input
 
   Other
     -help         -h    Brief help message

diff --git a/perl/bin/ascatCnToVCF.pl b/perl/bin/ascatCnToVCF.pl
@@ -37,7 +37,7 @@ BEGIN
 use Bio::DB::HTS;
 use Try::Tiny;
 use PCAP::Cli;
-
+use Carp;
 
 use Sanger::CGP::Vcf;
 use Sanger::CGP::Vcf::VCFCNConverter;
@@ -50,19 +50,65 @@ BEGIN
 
 {
   my $opts = setup();
-
-  my $mt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbm'}, -fasta => $opts->{'r'});
-  my $wt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbw'}, -fasta => $opts->{'r'});
-
-  #parse samples and contigs from the bam files.
-  my $contigs = Sanger::CGP::Vcf::BamUtil->parse_contigs($mt_sam->header->text.$wt_sam->header->text,$opts->{'rs'},$opts->{'ra'});
-  my $mt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($mt_sam->header->text,$opts->{'mss'},$opts->{'msq'},$opts->{'msa'},$opts->{'msc'},$opts->{'msd'},$opts->{'msp'});
-  my $wt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($wt_sam->header->text,$opts->{'wss'},$opts->{'wsq'},$opts->{'wsa'},$opts->{'wsc'},$opts->{'wsd'},$opts->{'wsp'});
-
-  # close files we're finished with
-  undef $mt_sam;
-  undef $wt_sam;
-
+
+  my $contigs;
+  my $mt_samples;
+  my $wt_samples;
+
+  #If tumour and sample name are provided dont require BAM or ref files
+  #and must build sample and ref object here
+  if ( defined $opts->{'tn'} and defined $opts->{'nn'} ) { 
+    $mt_samples->{ $opts->{'tn'} } = new Sanger::CGP::Vcf::Sample(
+        -name => $opts->{'tn'} ,
+        -study => undef,
+        -platform => $opts->{'msq'},
+        -seq_protocol => undef,
+        -accession => undef,
+        -accession_source => undef,
+        -description => undef
+      );
+    $wt_samples->{ $opts->{'nn'} } = new Sanger::CGP::Vcf::Sample(
+        -name => $opts->{'nn'} ,
+        -study => undef,
+        -platform => $opts->{'wsq'},
+        -seq_protocol => undef,
+        -accession => undef,
+        -accession_source => undef,
+        -description => undef
+      );
+
+    my $fai = $opts->{'r'}.'.fai';
+    open(my $FAI, $fai ) or die("\nERROR: Couldn't open $fai index file\n");
+    while(<$FAI>){
+      my ($name,$length) = split /\t/;
+      my $contig = new Sanger::CGP::Vcf::Contig(
+        -name => $name,
+        -length => $length,
+        -assembly => $opts->{'ra'},
+        -species => $opts->{'rs'}
+      );
+      if(exists $contigs->{$name}){
+      	croak "ERROR: Trying to merge contigs with conflicting data:\n".Dumper($contigs->{$name})."\n".Dumper($contig)
+          unless $contig->compare($contigs->{$name});
+      } else {
+      	$contigs->{$name} = $contig;
+      }
+    }
+  }
+  #BAM input
+  else {
+    my $mt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbm'}, -fasta => $opts->{'r'});
+    my $wt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbw'}, -fasta => $opts->{'r'});
+
+    #parse samples and contigs from the bam files.
+    $contigs = Sanger::CGP::Vcf::BamUtil->parse_contigs($mt_sam->header->text.$wt_sam->header->text,$opts->{'rs'},$opts->{'ra'});
+    $mt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($mt_sam->header->text,$opts->{'mss'},$opts->{'msq'},$opts->{'msa'},$opts->{'msc'},$opts->{'msd'},$opts->{'msp'});
+    $wt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($wt_sam->header->text,$opts->{'wss'},$opts->{'wsq'},$opts->{'wsa'},$opts->{'wsc'},$opts->{'wsd'},$opts->{'wsp'});
+
+    # close files we're finished with
+    undef $mt_sam;
+    undef $wt_sam;
+  }
   die "No samples found in normal bam file." if(scalar values %$wt_samples == 0);
   die "Multiple samples found in normal bam file." if(scalar values %$wt_samples > 1);
   die "No samples found in mutant bam file." if(scalar values %$mt_samples == 0);
@@ -164,6 +210,8 @@ sub setup{
           'rs|reference-species=s' => \$opts{'rs'},
           'ra|reference-assembly=s' => \$opts{'ra'},
           'r|reference=s' => \$opts{'r'},
+          'tn|tumour_name=s' => \$opts{'tn'},
+          'nn|normal_name=s' => \$opts{'nn'},
           '<>' => sub{push(@random_args,shift(@_));}
   ) or pod2usage(2);
 
@@ -175,18 +223,27 @@ sub setup{
   pod2usage(-verbose => 1) if(defined $opts{'h'});
   pod2usage(-verbose => 2) if(defined $opts{'m'});
 
-
   if($opts{'i'}){
     # can come from STDIN if not defined
     PCAP::Cli::file_for_reading('i', $opts{'i'});
   }
-  PCAP::Cli::file_for_reading('sbm', $opts{'sbm'});
-  PCAP::Cli::file_for_reading('sbw', $opts{'sbw'});
-  PCAP::Cli::file_for_reading('r', $opts{'r'});
 
   pod2usage(-message  => "\nERROR: msq|sample-sequencing-protocol-mut must be defined.\n", -verbose => 1,  -output => \*STDERR) if(exists $opts{'msq'} && ! defined $opts{'msq'});
   pod2usage(-message  => "\nERROR: wsq|sample-sequencing-protocol-norm must be defined.\n", -verbose => 1,  -output => \*STDERR) if(exists $opts{'wsq'} && ! defined $opts{'wsq'});
 
+  PCAP::Cli::file_for_reading('r', $opts{'r'});
+
+  if ( defined $opts{'tn'} or defined $opts{'nn'} ){
+    pod2usage(-message  => "\nERROR: When using sample name arguments both tumour and normal must be defined\n", -verbose => 1,  -output => \*STDERR) if( !( defined $opts{'tn'} && defined $opts{'nn'}) );
+    pod2usage(-message  => "\nERROR: When using sample name arguments ref. assembly must be specified (-ra)\n", -verbose => 1,  -output => \*STDERR) if( !( defined $opts{'ra'} && defined $opts{'ra'}) );
+    pod2usage(-message  => "\nERROR: When using sample name arguments ref. species must be specified (-rs)\n", -verbose => 1,  -output => \*STDERR) if( !( defined $opts{'rs'} && defined $opts{'rs'}) );
+    pod2usage(-message  => "\nERROR: When using sample name arguments sequencing platform must be specified (-wsp & -msp)\n", -verbose => 1,  -output => \*STDERR) if( !( defined $opts{'wsp'} && defined $opts{'wsp'})  and !( defined $opts{'msp'} && defined $opts{'msp'}) );
+    return \%opts;
+  } 
+
+  PCAP::Cli::file_for_reading('sbm', $opts{'sbm'});
+  PCAP::Cli::file_for_reading('sbw', $opts{'sbw'});
+
   return \%opts;
 }
 
@@ -222,7 +279,11 @@ =head1 SYNOPSIS
       -sample-accession-source-norm    -wsc  Normal sample accession source.
       -seq-platform-norm               -wsp  Normal sequencing platform [BAM HEADER].
 
+ 
     Other:
+     -tumour_name    -tn  Tumour sample name. For processing count file results
+     -normal_name    -nn  Normal sample name. For processing count file results
+
      -help     -h   Brief help message.
      -man      -m   Full documentation.
      -version  -v   Version information.