|
4 | 4 |
|
5 | 5 | 3.1-merge_timepoints.pl |
6 | 6 |
|
7 | | - This script uses USearch to recluster selected sequences from multiple timepoints |
| 7 | + This script uses vsearch to recluster selected sequences from multiple timepoints |
8 | 8 | to determine "birthdays" and persistence times. It is recommended to use |
9 | 9 | custom prefixes that correspond to actual time points from longitudinal |
10 | 10 | samples, whether relative (weeks or months post infection) or absolute |
|
24 | 24 | Defaults to "01", "02", etc and will be prepended with a |
25 | 25 | dash separating it from the orginal sequence name. Dashes |
26 | 26 | are not allowed in the labels themselves, to ease processing. |
27 | | - -c => Use USearch's cluster_fast command instead of derep_fulllength. |
| 27 | + -c => Use vsearch's cluster_fast command instead of derep_fulllength. |
28 | 28 | Allows fragments missing a few AA at either end to be |
29 | 29 | counted as the same as a full-length sequence. |
30 | 30 | --t => Clustering threshold to use. Not setable when using the |
|
137 | 137 |
|
138 | 138 |
|
139 | 139 |
|
140 | | -# run USearch |
| 140 | +# run vsearch |
141 | 141 | my $cmd = ppath() . "vsearch -derep_fulllength work/phylo/all_seqs.fa -uc work/phylo/uc"; |
142 | 142 | if ($clustFast){ |
143 | 143 | # -maxgaps parameter treats sequences unique except for an indel as distinct |
144 | | - # need to sort by length because of possible fragments |
145 | | - $cmd = ppath() . "vsearch -cluster_fast work/phylo/all_seqs.fa -sort length -id $threshold -uc work/phylo/uc -maxgaps 2"; |
| 144 | + # vsearch automatically sorts by length, accountign for possible fragments |
| 145 | + $cmd = ppath() . "vsearch -cluster_fast work/phylo/all_seqs.fa -id $threshold -uc work/phylo/uc -maxgaps 2"; |
146 | 146 | } |
147 | 147 | print "$cmd\n"; |
148 | 148 | system($cmd); |
149 | 149 |
|
150 | 150 |
|
151 | 151 |
|
152 | | -#Parse USearch output (sample lines below) |
| 152 | +#Parse vsearch output (sample lines below) |
153 | 153 | #S 0 348 * . * * * 00-000154 * |
154 | 154 | #H 0 348 100.0 . 0 348 = 00-000180 00-000154 |
155 | 155 | #C 0 18258 * * * * * 00-000154 * |
156 | 156 |
|
157 | 157 | my %cluster; |
158 | | -open UC, "work/phylo/uc" or die "Can't find output from USearch: $!. Please check parameters.\n"; |
| 158 | +open UC, "work/phylo/uc" or die "Can't find output from vsearch: $!. Please check parameters.\n"; |
159 | 159 | while (<UC>) { |
160 | 160 | last if /^C/; #speed processing by skipping summary lines |
161 | 161 |
|
|
0 commit comments