-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathribosomeFilter.sh
executable file
·117 lines (96 loc) · 2.82 KB
/
ribosomeFilter.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/bin/sh
# blastn -query -o -outfmt 6 qseqid title sblastnames -value 0.001 -num_threads 10 -db /scratch/blastdb/nt
# usage ribosomeFilter.sh -f multifata -b blast.out
echo "Running from $HOSTNAME";
help_msg="Removes all the ribosomal sequences based on blast output,\n
\n
Usage:\n
$(basename "$0") [-h] -f <multifasta.fa> [-o output.fasta] [-b blast out]\n
\n
where:\n
-h shows help message.\n
-f input fasta to be filtered.\n
-o output fasta filename (default: <name>-ribofiltered.fa).\n
-b -outfmt 6 of blastn.\n
-t Number of threads to use (default: 1)."
# defining options
while getopts ":h:f:o:b:t:" option; do
case "$option" in
h)
echo -e $help_msg >&2;
exit;
;;
f)
INFILE=$OPTARG >&2;
echo Input fasta: $INFILE;
if [[ ! -e $INFILE ]]; then
echo "Input file doesn't exist!";
exit;
fi
filename=$(basename -- "$INFILE");
filename="${filename%.*}";
OUTFILE="$filename-ribofiltered.fa";
;;
o)
OUTFILE=${OPTARG:-$OUTFILE}; >&2;
;;
b)
BLAST=${OPTARG:-blastout} >&2;
;;
t)
PTHREAD=${OPTARG:-1} >&2;
;;
\?)
echo "ERROR: Invalid option. -$OPTARG" >&2;
echo -e $help_msg;
exit 1;
;;
:)
echo "ERROR: Option -$OPTARG requires an argument." >&2;
echo -e $help_msg;
exit 1;
;;
*)
echo "ERROR: No such option: -$OPTARG" >&2;
exit 1;
;;
esac
done
if ((OPTIND == 1))
then
echo "ERROR: No options specified";
exit 1;
fi
echo "Output fasta: $OUTFILE";
# blastn -query $INFILE -o blastout -outfmt "6 stitle sblastnames scomnames" -evalue 0.001 -num_threads 5 -db /scratch/blastdbt
InName=( $(awk '{print $1}' $BLAST | uniq) );
#awk '{print $1}' $BLAST | uniq | head -n 100 > tmpname
let "counter=0"
echo "Blast file: $BLAST";
#grepper() {
# if [ $1 == DROUGHT* ]; then
# counter=$((counter+1))
# fi
# if [ ! -z `grep -qi "$1.*(ribosom|protein).*" $BLAST` ]
# then
# else
# echo $1 >> tmpFilterName;
# fi
#}
for name in "${InName[@]}"; do
if grep -Eqi "$name.*(protein|ribosom).*" $BLAST; then
((counter++))
else
echo $name >> tmpFilterName;
fi
done
#export -f grepper
#parallel --bar -j $PTHREAD -k grepper ::: "${InName[@]}";
#parallel --bar -j $PTHREAD -k grepper :::: tmpname
xargs samtools faidx /2/scratch/lucy/misc/2transcripts.fa < tmpFilterName > ribosomPostFilter.fa;
if [ -e tmpFilterName ]; then
rm tmpFilterName;
fi
echo "Complete!";
echo "$counter genes were filtered!";
echo "(╯°□°)╯︵ ┻━┻";