Skip to content

Commit

Permalink
Merge branch 'release/4.4.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
ra11 committed Aug 19, 2020
2 parents 7943f3d + a5b50e0 commit 0e99853
Show file tree
Hide file tree
Showing 9 changed files with 389 additions and 44 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changes

## 4.4.0

* Added ascatCounts to produce counts files
* Modified ascat wrapper to handle count files as input

## 4.3.4

* Eliminated redundant logic from setup script
Expand Down
12 changes: 7 additions & 5 deletions perl/MANIFEST
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
bin/ascat.pl
bin/ascatCnToVCF.pl
bin/ascatCounts.pl
bin/ascatFailedCnCsv.pl
bin/ascatToBigWig.pl
bin/utilities/ascatFaiChunk.pl
Expand All @@ -21,23 +22,24 @@ docs/pod_html/_whtprpk.css
docs/pod_html/_whtpurk.css
docs/pod_html/ascat.html
docs/pod_html/ascatCnToVCF.html
docs/pod_html/ascatCounts.html
docs/pod_html/ascatFailedCnCsv.html
docs/pod_html/ascatToBigWig.html
docs/pod_html/index.html
docs/pod_html/Sanger/CGP/Ascat.html
docs/pod_html/utilities/ascatSnpPanelGenerator.html
docs/reports_html/blib-lib-Sanger-CGP-Ascat-Implement-pm--branch.html
docs/reports_html/blib-lib-Sanger-CGP-Ascat-Implement-pm--subroutine.html
docs/reports_html/blib-lib-Sanger-CGP-Ascat-Implement-pm.html
docs/reports_html/blib-lib-Sanger-CGP-Ascat-pm--subroutine.html
docs/reports_html/blib-lib-Sanger-CGP-Ascat-pm.html
docs/reports_html/common.js
docs/reports_html/cover.14
docs/reports_html/cover.css
docs/reports_html/coverage.html
docs/reports_html/css.js
docs/reports_html/digests
docs/reports_html/index.html
docs/reports_html/lib-Sanger-CGP-Ascat-Implement-pm--branch.html
docs/reports_html/lib-Sanger-CGP-Ascat-Implement-pm--subroutine.html
docs/reports_html/lib-Sanger-CGP-Ascat-Implement-pm.html
docs/reports_html/lib-Sanger-CGP-Ascat-pm--subroutine.html
docs/reports_html/lib-Sanger-CGP-Ascat-pm.html
docs/reports_html/standardista-table-sorting.js
docs/reports_text/coverage.txt
lib/Sanger/CGP/Ascat.pm
Expand Down
1 change: 1 addition & 0 deletions perl/Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ WriteMakefile(
bin/ascatCnToVCF.pl
bin/ascatFailedCnCsv.pl
bin/ascatToBigWig.pl
bin/ascatCounts.pl
bin/utilities/ascatFaiChunk.pl
bin/utilities/ascatSnpPanelFromVcfs.pl
bin/utilities/ascatSnpPanelGcCorrections.pl
Expand Down
35 changes: 31 additions & 4 deletions perl/bin/ascat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
BEGIN {
use Cwd qw(abs_path);
use File::Basename;
use File::Path qw(make_path);
unshift (@INC,dirname(abs_path($0)).'/../lib');
};

Expand Down Expand Up @@ -56,14 +57,19 @@ BEGIN

# register any process that can run in parallel here
$threads->add_function('allele_count', \&Sanger::CGP::Ascat::Implement::allele_count);
$threads->add_function('deploy_counts', \&Sanger::CGP::Ascat::Implement::deploy_counts);

# start processes here (in correct order obviously), add conditions for skipping based on 'process' option
if(!exists $options->{'process'} || $options->{'process'} eq 'allele_count') {
if( ($options->{'counts_input'} == 0) && (!exists $options->{'process'} || $options->{'process'} eq 'allele_count')) {
my $jobs = $options->{'lociChrsBySample'};
$jobs = $options->{'limit'} if(exists $options->{'limit'} && defined $options->{'limit'});
$threads->run($jobs, 'allele_count', $options);
}

if ( $options->{'counts_input'} == 1) {
my $ascat_out = File::Spec->catdir(abs_path($options->{'tmp'}),'ascat');
make_path($ascat_out) unless(-e $ascat_out);
$threads->run(2, 'deploy_counts', $options);
}
Sanger::CGP::Ascat::Implement::ascat($options) if(!exists $options->{'process'} || $options->{'process'} eq 'ascat');
if(!exists $options->{'process'} || $options->{'process'} eq 'finalise') {
Sanger::CGP::Ascat::Implement::finalise($options);
Expand Down Expand Up @@ -110,6 +116,8 @@ sub setup {
'f|force' => \$opts{'force'},
'nc|noclean' => \$opts{'noclean'},
'nb|nobigwig' => \$opts{'nobigwig'},
'tn|t_name=s' => \$opts{'t_name'},
'nn|n_name=s' => \$opts{'n_name'}
) or pod2usage(2);

pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'});
Expand Down Expand Up @@ -145,6 +153,23 @@ sub setup {

PCAP::Cli::file_for_reading('tumour', $opts{'tumour'});
PCAP::Cli::file_for_reading('normal', $opts{'normal'});

#special case of couts file as input
$opts{'counts_input'} = 0;
if ( ( $opts{'tumour'} =~ /\.count\.gz$/ ) && ( $opts{'normal'} =~ /\.count\.gz$/ ) ) {
warn qq{NOTE: using counts inputs, skipping allelecount step\n};
if ( ( !defined($opts{'t_name'} )) || ( ! defined($opts{'n_name'})) ){
pod2usage(-msg => "\nERROR: Must specify normal & tumour names when using count files as input\n", -verbose => 1, -output => \*STDERR);
}
pod2usage(-msg => "\nERROR: Must specify assembly (-ra ) when using count files as input\n", -verbose => 1, -output => \*STDERR) unless ( defined( $opts{'assembly'} ) );
pod2usage(-msg => "\nERROR: Must specify species (-rs ) when using count files as input\n", -verbose => 1, -output => \*STDERR) unless ( defined( $opts{'species'} ) );
pod2usage(-msg => "\nERROR: Must specigy platform (-pl ) when using count files as input\n", -verbose => 1, -output => \*STDERR) unless ( defined( $opts{'platform'} ) );
pod2usage(-msg => "\nERROR: Must specify genderChr when using count files as input\n", -verbose => 1, -output => \*STDERR) unless ( defined( $opts{'genderChr'} ) );
$opts{'counts_input'} = 1;
}
if ( !( $opts{'tumour'} =~ /\.count\.gz$/ ) != !( $opts{'normal'} =~ /\.count\.gz$/ ) ) {
pod2usage(-msg => "\nERROR: Both tumour and normal need to be count files.\n", -verbose => 1, -output => \*STDERR);
}
PCAP::Cli::file_for_reading('snp_gc', $opts{'snp_gc'});
PCAP::Cli::file_for_reading('reference', $opts{'reference'});
PCAP::Cli::out_dir_check('outdir', $opts{'outdir'});
Expand Down Expand Up @@ -244,8 +269,8 @@ =head1 SYNOPSIS
Required parameters
-outdir -o Folder to output result to.
-tumour -t Tumour BAM/CRAM file
-normal -n Normal BAM/CRAM file
-tumour -t Tumour BAM/CRAM/counts file
-normal -n Normal BAM/CRAM/counts file
-reference -r Reference fasta
-snp_gc -sg Snp GC correction file
-protocol -pr Sequencing protocol (e.g. WGS, WXS)
Expand Down Expand Up @@ -279,6 +304,8 @@ =head1 SYNOPSIS
-noclean -nc Finalise results but don't clean up the tmp directory.
- Useful when including a manual check and restarting ascat with new pu and pi params.
-nobigwig -nb Don't generate BigWig files.
-t_name -tn Tumour name to use when using count files as input
-n_name -nn Noraml name to use when using count files as input
Other
-help -h Brief help message
Expand Down
97 changes: 79 additions & 18 deletions perl/bin/ascatCnToVCF.pl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ BEGIN
use Bio::DB::HTS;
use Try::Tiny;
use PCAP::Cli;

use Carp;

use Sanger::CGP::Vcf;
use Sanger::CGP::Vcf::VCFCNConverter;
Expand All @@ -50,19 +50,65 @@ BEGIN

{
my $opts = setup();

my $mt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbm'}, -fasta => $opts->{'r'});
my $wt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbw'}, -fasta => $opts->{'r'});

#parse samples and contigs from the bam files.
my $contigs = Sanger::CGP::Vcf::BamUtil->parse_contigs($mt_sam->header->text.$wt_sam->header->text,$opts->{'rs'},$opts->{'ra'});
my $mt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($mt_sam->header->text,$opts->{'mss'},$opts->{'msq'},$opts->{'msa'},$opts->{'msc'},$opts->{'msd'},$opts->{'msp'});
my $wt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($wt_sam->header->text,$opts->{'wss'},$opts->{'wsq'},$opts->{'wsa'},$opts->{'wsc'},$opts->{'wsd'},$opts->{'wsp'});

# close files we're finished with
undef $mt_sam;
undef $wt_sam;


my $contigs;
my $mt_samples;
my $wt_samples;

#If tumour and sample name are provided dont require BAM or ref files
#and must build sample and ref object here
if ( defined $opts->{'tn'} and defined $opts->{'nn'} ) {
$mt_samples->{ $opts->{'tn'} } = new Sanger::CGP::Vcf::Sample(
-name => $opts->{'tn'} ,
-study => undef,
-platform => $opts->{'msq'},
-seq_protocol => undef,
-accession => undef,
-accession_source => undef,
-description => undef
);
$wt_samples->{ $opts->{'nn'} } = new Sanger::CGP::Vcf::Sample(
-name => $opts->{'nn'} ,
-study => undef,
-platform => $opts->{'wsq'},
-seq_protocol => undef,
-accession => undef,
-accession_source => undef,
-description => undef
);

my $fai = $opts->{'r'}.'.fai';
open(my $FAI, $fai ) or die("\nERROR: Couldn't open $fai index file\n");
while(<$FAI>){
my ($name,$length) = split /\t/;
my $contig = new Sanger::CGP::Vcf::Contig(
-name => $name,
-length => $length,
-assembly => $opts->{'ra'},
-species => $opts->{'rs'}
);
if(exists $contigs->{$name}){
croak "ERROR: Trying to merge contigs with conflicting data:\n".Dumper($contigs->{$name})."\n".Dumper($contig)
unless $contig->compare($contigs->{$name});
} else {
$contigs->{$name} = $contig;
}
}
}
#BAM input
else {
my $mt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbm'}, -fasta => $opts->{'r'});
my $wt_sam = Bio::DB::HTS->new(-bam => $opts->{'sbw'}, -fasta => $opts->{'r'});

#parse samples and contigs from the bam files.
$contigs = Sanger::CGP::Vcf::BamUtil->parse_contigs($mt_sam->header->text.$wt_sam->header->text,$opts->{'rs'},$opts->{'ra'});
$mt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($mt_sam->header->text,$opts->{'mss'},$opts->{'msq'},$opts->{'msa'},$opts->{'msc'},$opts->{'msd'},$opts->{'msp'});
$wt_samples = Sanger::CGP::Vcf::BamUtil->parse_samples($wt_sam->header->text,$opts->{'wss'},$opts->{'wsq'},$opts->{'wsa'},$opts->{'wsc'},$opts->{'wsd'},$opts->{'wsp'});

# close files we're finished with
undef $mt_sam;
undef $wt_sam;
}
die "No samples found in normal bam file." if(scalar values %$wt_samples == 0);
die "Multiple samples found in normal bam file." if(scalar values %$wt_samples > 1);
die "No samples found in mutant bam file." if(scalar values %$mt_samples == 0);
Expand Down Expand Up @@ -164,6 +210,8 @@ sub setup{
'rs|reference-species=s' => \$opts{'rs'},
'ra|reference-assembly=s' => \$opts{'ra'},
'r|reference=s' => \$opts{'r'},
'tn|tumour_name=s' => \$opts{'tn'},
'nn|normal_name=s' => \$opts{'nn'},
'<>' => sub{push(@random_args,shift(@_));}
) or pod2usage(2);

Expand All @@ -175,18 +223,27 @@ sub setup{
pod2usage(-verbose => 1) if(defined $opts{'h'});
pod2usage(-verbose => 2) if(defined $opts{'m'});


if($opts{'i'}){
# can come from STDIN if not defined
PCAP::Cli::file_for_reading('i', $opts{'i'});
}
PCAP::Cli::file_for_reading('sbm', $opts{'sbm'});
PCAP::Cli::file_for_reading('sbw', $opts{'sbw'});
PCAP::Cli::file_for_reading('r', $opts{'r'});

pod2usage(-message => "\nERROR: msq|sample-sequencing-protocol-mut must be defined.\n", -verbose => 1, -output => \*STDERR) if(exists $opts{'msq'} && ! defined $opts{'msq'});
pod2usage(-message => "\nERROR: wsq|sample-sequencing-protocol-norm must be defined.\n", -verbose => 1, -output => \*STDERR) if(exists $opts{'wsq'} && ! defined $opts{'wsq'});

PCAP::Cli::file_for_reading('r', $opts{'r'});

if ( defined $opts{'tn'} or defined $opts{'nn'} ){
pod2usage(-message => "\nERROR: When using sample name arguments both tumour and normal must be defined\n", -verbose => 1, -output => \*STDERR) if( !( defined $opts{'tn'} && defined $opts{'nn'}) );
pod2usage(-message => "\nERROR: When using sample name arguments ref. assembly must be specified (-ra)\n", -verbose => 1, -output => \*STDERR) if( !( defined $opts{'ra'} && defined $opts{'ra'}) );
pod2usage(-message => "\nERROR: When using sample name arguments ref. species must be specified (-rs)\n", -verbose => 1, -output => \*STDERR) if( !( defined $opts{'rs'} && defined $opts{'rs'}) );
pod2usage(-message => "\nERROR: When using sample name arguments sequencing platform must be specified (-wsp & -msp)\n", -verbose => 1, -output => \*STDERR) if( !( defined $opts{'wsp'} && defined $opts{'wsp'}) and !( defined $opts{'msp'} && defined $opts{'msp'}) );
return \%opts;
}

PCAP::Cli::file_for_reading('sbm', $opts{'sbm'});
PCAP::Cli::file_for_reading('sbw', $opts{'sbw'});

return \%opts;
}

Expand Down Expand Up @@ -222,7 +279,11 @@ =head1 SYNOPSIS
-sample-accession-source-norm -wsc Normal sample accession source.
-seq-platform-norm -wsp Normal sequencing platform [BAM HEADER].
Other:
-tumour_name -tn Tumour sample name. For processing count file results
-normal_name -nn Normal sample name. For processing count file results
-help -h Brief help message.
-man -m Full documentation.
-version -v Version information.
Expand Down
Loading

0 comments on commit 0e99853

Please sign in to comment.