diff --git a/CHANGES.md b/CHANGES.md index 1547ad0..3e2bb63 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # Changes +## 4.4.0 + +* Added ascatCounts to produce counts files +* Modified ascat wrapper to handle count files as input + ## 4.3.4 * Eliminated redundant logic from setup script diff --git a/perl/MANIFEST b/perl/MANIFEST index 2e6291b..e8b5e0f 100644 --- a/perl/MANIFEST +++ b/perl/MANIFEST @@ -1,5 +1,6 @@ bin/ascat.pl bin/ascatCnToVCF.pl +bin/ascatCounts.pl bin/ascatFailedCnCsv.pl bin/ascatToBigWig.pl bin/utilities/ascatFaiChunk.pl @@ -21,16 +22,12 @@ docs/pod_html/_whtprpk.css docs/pod_html/_whtpurk.css docs/pod_html/ascat.html docs/pod_html/ascatCnToVCF.html +docs/pod_html/ascatCounts.html docs/pod_html/ascatFailedCnCsv.html docs/pod_html/ascatToBigWig.html docs/pod_html/index.html docs/pod_html/Sanger/CGP/Ascat.html docs/pod_html/utilities/ascatSnpPanelGenerator.html -docs/reports_html/blib-lib-Sanger-CGP-Ascat-Implement-pm--branch.html -docs/reports_html/blib-lib-Sanger-CGP-Ascat-Implement-pm--subroutine.html -docs/reports_html/blib-lib-Sanger-CGP-Ascat-Implement-pm.html -docs/reports_html/blib-lib-Sanger-CGP-Ascat-pm--subroutine.html -docs/reports_html/blib-lib-Sanger-CGP-Ascat-pm.html docs/reports_html/common.js docs/reports_html/cover.14 docs/reports_html/cover.css @@ -38,6 +35,11 @@ docs/reports_html/coverage.html docs/reports_html/css.js docs/reports_html/digests docs/reports_html/index.html +docs/reports_html/lib-Sanger-CGP-Ascat-Implement-pm--branch.html +docs/reports_html/lib-Sanger-CGP-Ascat-Implement-pm--subroutine.html +docs/reports_html/lib-Sanger-CGP-Ascat-Implement-pm.html +docs/reports_html/lib-Sanger-CGP-Ascat-pm--subroutine.html +docs/reports_html/lib-Sanger-CGP-Ascat-pm.html docs/reports_html/standardista-table-sorting.js docs/reports_text/coverage.txt lib/Sanger/CGP/Ascat.pm diff --git a/perl/Makefile.PL b/perl/Makefile.PL index 62b3958..95676d6 100644 --- a/perl/Makefile.PL +++ b/perl/Makefile.PL @@ -34,6 +34,7 @@ WriteMakefile( bin/ascatCnToVCF.pl bin/ascatFailedCnCsv.pl bin/ascatToBigWig.pl + bin/ascatCounts.pl bin/utilities/ascatFaiChunk.pl bin/utilities/ascatSnpPanelFromVcfs.pl bin/utilities/ascatSnpPanelGcCorrections.pl diff --git a/perl/bin/ascat.pl b/perl/bin/ascat.pl index 4356f52..868d9c2 100755 --- a/perl/bin/ascat.pl +++ b/perl/bin/ascat.pl @@ -24,6 +24,7 @@ BEGIN { use Cwd qw(abs_path); use File::Basename; + use File::Path qw(make_path); unshift (@INC,dirname(abs_path($0)).'/../lib'); }; @@ -56,14 +57,19 @@ BEGIN # register any process that can run in parallel here $threads->add_function('allele_count', \&Sanger::CGP::Ascat::Implement::allele_count); + $threads->add_function('deploy_counts', \&Sanger::CGP::Ascat::Implement::deploy_counts); # start processes here (in correct order obviously), add conditions for skipping based on 'process' option - if(!exists $options->{'process'} || $options->{'process'} eq 'allele_count') { + if( ($options->{'counts_input'} == 0) && (!exists $options->{'process'} || $options->{'process'} eq 'allele_count')) { my $jobs = $options->{'lociChrsBySample'}; $jobs = $options->{'limit'} if(exists $options->{'limit'} && defined $options->{'limit'}); $threads->run($jobs, 'allele_count', $options); } - + if ( $options->{'counts_input'} == 1) { + my $ascat_out = File::Spec->catdir(abs_path($options->{'tmp'}),'ascat'); + make_path($ascat_out) unless(-e $ascat_out); + $threads->run(2, 'deploy_counts', $options); + } Sanger::CGP::Ascat::Implement::ascat($options) if(!exists $options->{'process'} || $options->{'process'} eq 'ascat'); if(!exists $options->{'process'} || $options->{'process'} eq 'finalise') { Sanger::CGP::Ascat::Implement::finalise($options); @@ -110,6 +116,8 @@ sub setup { 'f|force' => \$opts{'force'}, 'nc|noclean' => \$opts{'noclean'}, 'nb|nobigwig' => \$opts{'nobigwig'}, + 'tn|t_name=s' => \$opts{'t_name'}, + 'nn|n_name=s' => \$opts{'n_name'} ) or pod2usage(2); pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'}); @@ -145,6 +153,23 @@ sub setup { PCAP::Cli::file_for_reading('tumour', $opts{'tumour'}); PCAP::Cli::file_for_reading('normal', $opts{'normal'}); + + #special case of couts file as input + $opts{'counts_input'} = 0; + if ( ( $opts{'tumour'} =~ /\.count\.gz$/ ) && ( $opts{'normal'} =~ /\.count\.gz$/ ) ) { + warn qq{NOTE: using counts inputs, skipping allelecount step\n}; + if ( ( !defined($opts{'t_name'} )) || ( ! defined($opts{'n_name'})) ){ + pod2usage(-msg => "\nERROR: Must specify normal & tumour names when using count files as input\n", -verbose => 1, -output => \*STDERR); + } + pod2usage(-msg => "\nERROR: Must specify assembly (-ra ) when using count files as input\n", -verbose => 1, -output => \*STDERR) unless ( defined( $opts{'assembly'} ) ); + pod2usage(-msg => "\nERROR: Must specify species (-rs ) when using count files as input\n", -verbose => 1, -output => \*STDERR) unless ( defined( $opts{'species'} ) ); + pod2usage(-msg => "\nERROR: Must specigy platform (-pl ) when using count files as input\n", -verbose => 1, -output => \*STDERR) unless ( defined( $opts{'platform'} ) ); + pod2usage(-msg => "\nERROR: Must specify genderChr when using count files as input\n", -verbose => 1, -output => \*STDERR) unless ( defined( $opts{'genderChr'} ) ); + $opts{'counts_input'} = 1; + } + if ( !( $opts{'tumour'} =~ /\.count\.gz$/ ) != !( $opts{'normal'} =~ /\.count\.gz$/ ) ) { + pod2usage(-msg => "\nERROR: Both tumour and normal need to be count files.\n", -verbose => 1, -output => \*STDERR); + } PCAP::Cli::file_for_reading('snp_gc', $opts{'snp_gc'}); PCAP::Cli::file_for_reading('reference', $opts{'reference'}); PCAP::Cli::out_dir_check('outdir', $opts{'outdir'}); @@ -244,8 +269,8 @@ =head1 SYNOPSIS Required parameters -outdir -o Folder to output result to. - -tumour -t Tumour BAM/CRAM file - -normal -n Normal BAM/CRAM file + -tumour -t Tumour BAM/CRAM/counts file + -normal -n Normal BAM/CRAM/counts file -reference -r Reference fasta -snp_gc -sg Snp GC correction file -protocol -pr Sequencing protocol (e.g. WGS, WXS) @@ -279,6 +304,8 @@ =head1 SYNOPSIS -noclean -nc Finalise results but don't clean up the tmp directory. - Useful when including a manual check and restarting ascat with new pu and pi params. -nobigwig -nb Don't generate BigWig files. + -t_name -tn Tumour name to use when using count files as input + -n_name -nn Noraml name to use when using count files as input Other -help -h Brief help message diff --git a/perl/bin/ascatCnToVCF.pl b/perl/bin/ascatCnToVCF.pl index 2971374..595cc50 100755 --- a/perl/bin/ascatCnToVCF.pl +++ b/perl/bin/ascatCnToVCF.pl @@ -37,7 +37,7 @@ BEGIN use Bio::DB::HTS; use Try::Tiny; use PCAP::Cli; - +use Carp; use Sanger::CGP::Vcf; use Sanger::CGP::Vcf::VCFCNConverter; @@ -50,19 +50,65 @@ BEGIN { my $opts = setup(); - - my $mt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbm'}, -fasta => $opts->{'r'}); - my $wt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbw'}, -fasta => $opts->{'r'}); - - #parse samples and contigs from the bam files. - my $contigs = Sanger::CGP::Vcf::BamUtil->parse_contigs($mt_sam->header->text.$wt_sam->header->text,$opts->{'rs'},$opts->{'ra'}); - my $mt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($mt_sam->header->text,$opts->{'mss'},$opts->{'msq'},$opts->{'msa'},$opts->{'msc'},$opts->{'msd'},$opts->{'msp'}); - my $wt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($wt_sam->header->text,$opts->{'wss'},$opts->{'wsq'},$opts->{'wsa'},$opts->{'wsc'},$opts->{'wsd'},$opts->{'wsp'}); - - # close files we're finished with - undef $mt_sam; - undef $wt_sam; - + + my $contigs; + my $mt_samples; + my $wt_samples; + + #If tumour and sample name are provided dont require BAM or ref files + #and must build sample and ref object here + if ( defined $opts->{'tn'} and defined $opts->{'nn'} ) { + $mt_samples->{ $opts->{'tn'} } = new Sanger::CGP::Vcf::Sample( + -name => $opts->{'tn'} , + -study => undef, + -platform => $opts->{'msq'}, + -seq_protocol => undef, + -accession => undef, + -accession_source => undef, + -description => undef + ); + $wt_samples->{ $opts->{'nn'} } = new Sanger::CGP::Vcf::Sample( + -name => $opts->{'nn'} , + -study => undef, + -platform => $opts->{'wsq'}, + -seq_protocol => undef, + -accession => undef, + -accession_source => undef, + -description => undef + ); + + my $fai = $opts->{'r'}.'.fai'; + open(my $FAI, $fai ) or die("\nERROR: Couldn't open $fai index file\n"); + while(<$FAI>){ + my ($name,$length) = split /\t/; + my $contig = new Sanger::CGP::Vcf::Contig( + -name => $name, + -length => $length, + -assembly => $opts->{'ra'}, + -species => $opts->{'rs'} + ); + if(exists $contigs->{$name}){ + croak "ERROR: Trying to merge contigs with conflicting data:\n".Dumper($contigs->{$name})."\n".Dumper($contig) + unless $contig->compare($contigs->{$name}); + } else { + $contigs->{$name} = $contig; + } + } + } + #BAM input + else { + my $mt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbm'}, -fasta => $opts->{'r'}); + my $wt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbw'}, -fasta => $opts->{'r'}); + + #parse samples and contigs from the bam files. + $contigs = Sanger::CGP::Vcf::BamUtil->parse_contigs($mt_sam->header->text.$wt_sam->header->text,$opts->{'rs'},$opts->{'ra'}); + $mt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($mt_sam->header->text,$opts->{'mss'},$opts->{'msq'},$opts->{'msa'},$opts->{'msc'},$opts->{'msd'},$opts->{'msp'}); + $wt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($wt_sam->header->text,$opts->{'wss'},$opts->{'wsq'},$opts->{'wsa'},$opts->{'wsc'},$opts->{'wsd'},$opts->{'wsp'}); + + # close files we're finished with + undef $mt_sam; + undef $wt_sam; + } die "No samples found in normal bam file." if(scalar values %$wt_samples == 0); die "Multiple samples found in normal bam file." if(scalar values %$wt_samples > 1); die "No samples found in mutant bam file." if(scalar values %$mt_samples == 0); @@ -164,6 +210,8 @@ sub setup{ 'rs|reference-species=s' => \$opts{'rs'}, 'ra|reference-assembly=s' => \$opts{'ra'}, 'r|reference=s' => \$opts{'r'}, + 'tn|tumour_name=s' => \$opts{'tn'}, + 'nn|normal_name=s' => \$opts{'nn'}, '<>' => sub{push(@random_args,shift(@_));} ) or pod2usage(2); @@ -175,18 +223,27 @@ sub setup{ pod2usage(-verbose => 1) if(defined $opts{'h'}); pod2usage(-verbose => 2) if(defined $opts{'m'}); - if($opts{'i'}){ # can come from STDIN if not defined PCAP::Cli::file_for_reading('i', $opts{'i'}); } - PCAP::Cli::file_for_reading('sbm', $opts{'sbm'}); - PCAP::Cli::file_for_reading('sbw', $opts{'sbw'}); - PCAP::Cli::file_for_reading('r', $opts{'r'}); pod2usage(-message => "\nERROR: msq|sample-sequencing-protocol-mut must be defined.\n", -verbose => 1, -output => \*STDERR) if(exists $opts{'msq'} && ! defined $opts{'msq'}); pod2usage(-message => "\nERROR: wsq|sample-sequencing-protocol-norm must be defined.\n", -verbose => 1, -output => \*STDERR) if(exists $opts{'wsq'} && ! defined $opts{'wsq'}); + PCAP::Cli::file_for_reading('r', $opts{'r'}); + + if ( defined $opts{'tn'} or defined $opts{'nn'} ){ + pod2usage(-message => "\nERROR: When using sample name arguments both tumour and normal must be defined\n", -verbose => 1, -output => \*STDERR) if( !( defined $opts{'tn'} && defined $opts{'nn'}) ); + pod2usage(-message => "\nERROR: When using sample name arguments ref. assembly must be specified (-ra)\n", -verbose => 1, -output => \*STDERR) if( !( defined $opts{'ra'} && defined $opts{'ra'}) ); + pod2usage(-message => "\nERROR: When using sample name arguments ref. species must be specified (-rs)\n", -verbose => 1, -output => \*STDERR) if( !( defined $opts{'rs'} && defined $opts{'rs'}) ); + pod2usage(-message => "\nERROR: When using sample name arguments sequencing platform must be specified (-wsp & -msp)\n", -verbose => 1, -output => \*STDERR) if( !( defined $opts{'wsp'} && defined $opts{'wsp'}) and !( defined $opts{'msp'} && defined $opts{'msp'}) ); + return \%opts; + } + + PCAP::Cli::file_for_reading('sbm', $opts{'sbm'}); + PCAP::Cli::file_for_reading('sbw', $opts{'sbw'}); + return \%opts; } @@ -222,7 +279,11 @@ =head1 SYNOPSIS -sample-accession-source-norm -wsc Normal sample accession source. -seq-platform-norm -wsp Normal sequencing platform [BAM HEADER]. + Other: + -tumour_name -tn Tumour sample name. For processing count file results + -normal_name -nn Normal sample name. For processing count file results + -help -h Brief help message. -man -m Full documentation. -version -v Version information. diff --git a/perl/bin/ascatCounts.pl b/perl/bin/ascatCounts.pl new file mode 100755 index 0000000..e1a30c0 --- /dev/null +++ b/perl/bin/ascatCounts.pl @@ -0,0 +1,185 @@ +#!/usr/bin/perl + +##########LICENCE########## +# Copyright (c) 2014-2019 Genome Research Ltd. +# +# Author: CASM/Cancer IT +# +# This file is part of AscatNGS. +# +# AscatNGS is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +##########LICENCE########## + +BEGIN { + use Cwd qw(abs_path); + use File::Basename; + unshift (@INC,dirname(abs_path($0)).'/../lib'); +}; + +use strict; +use warnings FATAL => 'all'; +use autodie qw(:all); + +use File::Path qw(remove_tree make_path); +use Getopt::Long; +use File::Spec; +use Pod::Usage qw(pod2usage); +use List::Util qw(first); +use Const::Fast qw(const); +use File::Copy; +use File::Which qw(which); +use FindBin qw($Bin); + +use Sanger::CGP::Ascat::Implement; + +use PCAP::Cli; + +{ + my $options = setup(); + $options->{'tumour'} = $options->{'bam'}; #map input file to tumour + $options->{'tumour_name'} = (PCAP::Bam::sample_name($options->{'bam'}))[0]; + my $threads = PCAP::Threaded->new($options->{'threads'}); + + # register any process that can run in parallel here + $threads->add_function('allele_count', \&Sanger::CGP::Ascat::Implement::allele_count); + + # start processes here (in correct order obviously), add conditions for skipping based on 'process' option + my $jobs = $options->{'lociChrsBySample'}; + $jobs = $options->{'limit'} if(exists $options->{'limit'} && defined $options->{'limit'}); + $threads->run($jobs, 'allele_count', $options); + + Sanger::CGP::Ascat::Implement::merge_counts_and_index($options); + cleanup($options) unless($options->{'noclean'} == 1); +} + +sub cleanup { + my $options = shift; + my $tmpdir = $options->{'tmp'}; + move(File::Spec->catdir($tmpdir, 'ascatCounts'), File::Spec->catdir($options->{'outdir'}, 'ascatCounts')) || die $!; + move(File::Spec->catdir($tmpdir, 'logs'), File::Spec->catdir($options->{'outdir'}, 'logs')) || die $!; + remove_tree $tmpdir if(-e $tmpdir); + return 0; +} + +sub _which { + my $prog = shift; + my $l_bin = $Bin; + my $path = File::Spec->catfile($l_bin, $prog); + $path = which($prog) unless(-e $path); + die "Failed to find $prog in PATH or local bin folder" unless(defined $path); + return $path; +} + +sub setup { + my %opts; + pod2usage(-msg => "\nERROR: Option must be defined.\n", -verbose => 1, -output => \*STDERR) if(scalar @ARGV == 0); + $opts{'cmd'} = join " ", $0, @ARGV; + GetOptions( 'h|help' => \$opts{'h'}, + 'm|man' => \$opts{'m'}, + 'v|version' => \$opts{'v'}, + 'o|outdir=s' => \$opts{'outdir'}, + 'b|bam=s' => \$opts{'bam'}, + 'c|cpus=i' => \$opts{'threads'}, + 'q|minbasequal=i' => \$opts{'minbasequal'}, + 'sg|snp_gc=s' => \$opts{'snp_gc'}, + 'r|reference=s' => \$opts{'reference'}, + 'nc|noclean' => \$opts{'noclean'}, + ) or pod2usage(2); + + pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'}); + pod2usage(-verbose => 2, -exitval => 0) if(defined $opts{'m'}); + + if($opts{'v'}){ + print Sanger::CGP::Ascat->VERSION."\n"; + exit; + } + + warn "Executing: $opts{cmd}\n"; + + if(!defined($opts{'noclean'})){ + $opts{'noclean'} = 0; + } + + # then check for no args: + my $defined; + for(keys %opts) { $defined++ if(defined $opts{$_}); } + pod2usage(-msg => "\nERROR: Options must be defined.\n", -verbose => 1, -output => \*STDERR) unless($defined); + + for my $item (qw(bam snp_gc reference outdir)) { + pod2usage(-msg => "\nERROR: Option '-$item' must be defined.\n", -verbose => 1, -output => \*STDERR) unless(defined $opts{$item}); + } + + for my $item(qw(bam reference outdir)) { + $opts{$item} = File::Spec->rel2abs( $opts{$item} ) if(defined $opts{$item}); + } + + PCAP::Cli::file_for_reading('bam', $opts{'bam'}); + PCAP::Cli::file_for_reading('snp_gc', $opts{'snp_gc'}); + PCAP::Cli::file_for_reading('reference', $opts{'reference'}); + PCAP::Cli::out_dir_check('outdir', $opts{'outdir'}); + + my $final_logs = File::Spec->catdir($opts{'outdir'}, 'logs'); + if(-e $final_logs) { + warn "NOTE: Presence of '$final_logs' directory suggests successful complete analysis, please delete to rerun\n"; + exit 0; + } + + $opts{'minbasequal'} = 20 unless(defined $opts{'minbasequal'}); + + $opts{'lociChrsBySample'} = scalar Sanger::CGP::Ascat::Implement::snpLociChrs(\%opts); + + $opts{'threads'} = 1 unless(defined $opts{'threads'}); + + my $tmpdir = File::Spec->catdir($opts{'outdir'}, 'tmpAscat'); + make_path($tmpdir) unless(-d $tmpdir); + my $progress = File::Spec->catdir($tmpdir, 'progress'); + make_path($progress) unless(-d $progress); + my $logs = File::Spec->catdir($tmpdir, 'logs'); + make_path($logs) unless(-d $logs); + + $opts{'tmp'} = $tmpdir; + + return \%opts; +} + + +__END__ + +=head1 ascat.pl + +Reference implementation of Cancer Genome Project Ascat +copy-number analysis pipeline. + +=head1 SYNOPSIS + +ascatCounts.pl [options] + + Please define as many of the parameters as possible + + Required parameters + + -outdir -o Folder to output result to. + -bam -b Input BAM/CRAM file + -reference -r Reference fasta + -snp_gc -sg Snp GC correction file + + Optional parameters + -minbasequal -q Minimum base quality required before allele is used. [20] + -cpus -c Number of cores to use. [1] + -noclean -nc Finalise results but don't clean up the tmp directory. + + Other + -help -h Brief help message + -man -m Full documentation. + -version -v Ascat version number diff --git a/perl/docs.tar.gz b/perl/docs.tar.gz index 97cbb3a..d1fb336 100644 Binary files a/perl/docs.tar.gz and b/perl/docs.tar.gz differ diff --git a/perl/lib/Sanger/CGP/Ascat.pm b/perl/lib/Sanger/CGP/Ascat.pm index d763bca..bfb5980 100644 --- a/perl/lib/Sanger/CGP/Ascat.pm +++ b/perl/lib/Sanger/CGP/Ascat.pm @@ -26,7 +26,7 @@ use strict; use Const::Fast qw(const); use base 'Exporter'; -our $VERSION = '4.3.4'; +our $VERSION = '4.4.0'; our @EXPORT = qw($VERSION); const my $LICENSE => diff --git a/perl/lib/Sanger/CGP/Ascat/Implement.pm b/perl/lib/Sanger/CGP/Ascat/Implement.pm index 97d88ef..d35ed6c 100644 --- a/perl/lib/Sanger/CGP/Ascat/Implement.pm +++ b/perl/lib/Sanger/CGP/Ascat/Implement.pm @@ -106,7 +106,7 @@ sub allele_count { make_path($loci_files) unless(-e $loci_files); my $chr = $seqs[$seq_idx-1]; - my $sname = sanitised_sample_from_bam($options->{$samp_type}); + my $sname = $options->{$samp_type.'_name'}; my $alleleCountOut = File::Spec->catfile($ac_out,sprintf '%s.%s.allct', $sname, $chr); # first we need a loci file, generate from the gc file: @@ -133,8 +133,8 @@ sub ascat { $tmp = abs_path($tmp); return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0); - my $tum_name = sanitised_sample_from_bam($options->{'tumour'}); - my $norm_name = sanitised_sample_from_bam($options->{'normal'}); + my $tum_name = $options->{'tumour_name'}; + my $norm_name = $options->{'normal_name'}; my $ascat_out = File::Spec->catdir($tmp, 'ascat'); make_path($ascat_out) unless(-e $ascat_out); @@ -147,14 +147,15 @@ sub ascat { my $tumcount = File::Spec->catfile($ascat_out,$tumcountfile); my $normcount = File::Spec->catfile($ascat_out,$normcountfile); - unless(PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'merge_counts_mt', 0)) { - merge_counts($options, $tmp, $tum_name, $tumcount); - PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'merge_counts_mt', 0); - } - - unless(PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'merge_counts_wt', 0)) { - merge_counts($options, $tmp, $norm_name, $normcount); - PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'merge_counts_wt', 0); + if ( $options->{'counts_input'} == 0 ) { + unless(PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'merge_counts_mt', 0)) { + merge_counts($options, $tmp, $tum_name, $tumcount); + PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'merge_counts_mt', 0); + } + unless(PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'merge_counts_wt', 0)) { + merge_counts($options, $tmp, $norm_name, $normcount); + PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'merge_counts_wt', 0); + } } my $clean_snp_gc = File::Spec->catfile($tmp,'SnpGcCorrections.tsv'); @@ -210,7 +211,7 @@ sub finalise { return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0); - my $tum_name = sanitised_sample_from_bam($options->{'tumour'}); + my $tum_name = $options->{'tumour_name'}; my $ascat_out = File::Spec->catdir($tmp, 'ascat'); my $cave_cn; my $force_complete = 0; @@ -294,8 +295,14 @@ sub finalise { $command .= " -o $new_vcf"; $command .= " -r $options->{reference}"; $command .= " -i $cave_cn"; - $command .= " -sbm $options->{tumour}"; - $command .= " -sbw $options->{normal}"; + if ( $options->{'counts_input'} == 0) { + $command .= " -sbm $options->{tumour}"; + $command .= " -sbw $options->{normal}"; + } + else { + $command .= " -tn $options->{tumour_name}"; + $command .= " -nn $options->{normal_name}"; + } $command .= " -ra $options->{assembly}" if(defined $options->{'assembly'}); $command .= " -rs $options->{species}" if(defined $options->{'species'}); $command .= " -msq $options->{protocol} -wsq $options->{protocol}" if(defined $options->{'protocol'}); @@ -371,6 +378,36 @@ sub merge_counts { return 1; } +sub merge_counts_and_index { + my $options = shift; + + my $tmp = $options->{'tmp'}; + $tmp = abs_path($tmp); + return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0); + + my $tum_name = $options->{'tumour_name'}; + + my $ascat_out = File::Spec->catdir($tmp, 'ascatCounts'); + make_path($ascat_out) unless(-e $ascat_out); + + my $tumcountfile = $tum_name . '.count'; + + my $tumcount = File::Spec->catfile($ascat_out,$tumcountfile); + + unless(PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 'merge_counts_mt', 0)) { + merge_counts($options, $tmp, $tum_name, $tumcount); + PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 'merge_counts_mt', 0); + } + my @commands = (); + my $tumcount_new = $tumcount.'.gz'; + my $sort_gz = sprintf q{(grep '^#' %s ; grep -v '^#' %s | sort -k 1,1 -k 2,2n) | %s -c > %s}, $tumcount, $tumcount, _which('bgzip'), $tumcount_new; + push @commands, $sort_gz; + my $tabix = sprintf('%s -s1 -b2 -e2 %s',_which('tabix'),$tumcount_new); + push @commands, $tabix; + PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), \@commands, 0); + unlink $tumcount; +} + sub get_allele_count_file_path { my ($tmp,$sample_name) = @_; return File::Spec->catfile(File::Spec->catdir($tmp, $sample_name),'sample.allele_count'); @@ -384,8 +421,14 @@ sub sanitised_sample_from_bam { sub prepare { my $options = shift; - $options->{'tumour_name'} = (PCAP::Bam::sample_name($options->{'tumour'}))[0]; - $options->{'normal_name'} = (PCAP::Bam::sample_name($options->{'normal'}))[0]; + if ( $options->{'counts_input'} == 0 ) { + $options->{'tumour_name'} = sanitised_sample_from_bam($options->{'tumour'}); + $options->{'normal_name'} = sanitised_sample_from_bam($options->{'normal'}); + } + else { + $options->{'tumour_name'} = $options->{'t_name'}; + $options->{'normal_name'} = $options->{'n_name'}; + } return 1; } @@ -400,6 +443,9 @@ sub _which { sub determine_gender { my $options = shift; + + die "Error: gender cannot be determined from counts file, it must be specified as a parameter\n" if ( $options->{'counts_input'} == 1); + my $gender_loci; if(defined $options->{'locus'}) { $gender_loci = $options->{'locus'}; @@ -469,4 +515,22 @@ sub limited_indices { return @indicies; } +sub deploy_counts { + my ($index_in, $options) = @_; + my $tmp = abs_path($options->{'tmp'}); + my $ascat_out = File::Spec->catdir($tmp,'ascat'); + my @commands; + my $tum_name = $options->{'tumour_name'}; + my $norm_name = $options->{'normal_name'}; + my $tumcount = File::Spec->catfile($ascat_out,$tum_name . '.count'); + my $normcount = File::Spec->catfile($ascat_out,$norm_name . '.count'); + if ($index_in == 1 ){ + push @commands, sprintf 'zcat %s > %s',$options->{'tumour'}, $tumcount; + } + if ($index_in == 2 ){ + push @commands, sprintf 'zcat %s > %s',$options->{'normal'}, $normcount; + } + PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), \@commands, 0); +} + 1;