-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcheck_length.pl
93 lines (81 loc) · 2.14 KB
/
check_length.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/perl
#
# This file is a script for Polymorphic Edge Detection.
#
# Multithreaded version of all in one.
#
# Copyright (C) 2019 National Agriculture and Food Research Organization. All Rights Reserved.
#
# License: refer to https://github.com/akiomiyao/ped
#
# Author: MIYAO Akio <[email protected]>
#
$usage = '
check_length.pl - output read counts for each sequence length.
e.g. perl check_length.pl SRR11542243
Fastq files should be saved in ./target/read directory.
Output data format: read lenth<TAB>number of reads
If fastq data have multiple length reads, clipping sequence is required.
For analisys sequence for RT-PCR product, check length using this script.
For example, highest read count of fastq files is 101,
perl ped.pl target=SRR11542243,ref=COVID19,clipping=100 (or 99).
Author: MIYAO Akio <[email protected]>
';
if ($ARGV[0] =~ /target|accession/){
my @arg = split(',', $ARGV[0]);
foreach (sort @arg){
next if $_ eq "";
my ($name, $val) = split('=', $_);
$$name = $val;
}
if ($target eq "" and $accession ne ""){
$target = $accession;
}
}elsif ($ARGV[0] ne ""){
$target = $ARGV[0];
}
$wd = "." if $wd eq "";
die $usage if $target eq "";
opendir(DIR, "$wd/$target/read/");
foreach (readdir(DIR)){
next if /^\./;
if (/gz$/){
open(IN, "zcat $wd/$target/read/$_ |");
}elsif(/bz2$/){
open(IN, "bzcat $wd/$target/read/$_ |");
}elsif(/xz$/){
open(IN, "xzcat $wd/$target/read/$_ |");
}else{
open(IN, "$wd/$target/read/$_");
}
while(<IN>){
$line ++;
if ($line % 4 == 2 and ! /N/){
chomp;
$len = length($_);
$count{$len}++;
$total ++;
}
}
close(IN);
}
close(DIR);
print "Length\tCount\tPercent\n";
foreach (reverse sort bynumber keys %count){
$sum += $count{$_};
$percent = int($sum * 100 / $total);
print "$_\t$count{$_}\t$percent%\n";
if ($percent >= 95 and $flag == 0){
print STDERR "Please choose optimal clipping length. Around 90% is enough.
Press c and Enter to continue or press Enter to exit.\n";
$input = <STDIN>;
if ($input =~ /c/i){
$flag = 1
}else{
exit;
}
}
}
sub bynumber{
$a <=> $b;
}