-
Notifications
You must be signed in to change notification settings - Fork 1
/
tss_dedupe_uni.pl
50 lines (43 loc) · 1.51 KB
/
tss_dedupe_uni.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/perl
# script to deduplicate and split TSS to bidirectional and unidirectional groups
# e.g. perl tss_dedupe_uni.pl TSS_hg38_gencode > TSS_hg38_gencode.uni.skipped
# (C) Yuri Kravatsky, [email protected]
use strict;
my $ARGC=scalar @ARGV;
if ($ARGC!=1) { die "Usage: tssdedupe TSSfilemask\n"; }
my $tssfilemask=$ARGV[0];
my $infile=$tssfilemask . ".bidi.sgr";
my $unifile=$tssfilemask . ".uni.sgr";
my $outfile=$tssfilemask . ".uni.dedupe";
open (EP,$infile) || die "Can't open \"$infile\" for reading: $!";
my %TSS;
while (<EP>) {
chomp;
chomp;
if (length($_)<5) { next; }
if ($_=~ m/^(\s+)?#/) { next; } # commented strings written in the same order
my @arr=split(/\t/);
#chr1 1000097 - HES4
my $gene=$arr[3];
# $gene=~s/\.\d+//g;
$TSS{$gene}=1;
}
close EP;
open (OUTP,">$outfile") || die "Can't create \"$outfile\": $!";
open (EP,$unifile) || die "Can't open \"$unifile\" for reading: $!";
while (<EP>) {
chomp;
chomp;
if (length($_)<5) { next; }
if ($_=~ m/^(\s+)?#/) { next; } # commented strings written in the same order
my @arr=split(/\t/);
#chr1 1000097 - HES4
my $gene=$arr[3];
# $gene=~s/\.\d+//g;
if (defined $TSS{$gene}) { print "$_\n";
next;
}
print OUTP "$_\n";
}
close EP;
close OUTP;