@@ -212,29 +212,34 @@ def main():
212212 aln = aln [ :, 0 :- extra ]
213213 align_len -= extra
214214
215- #kill the fasta def line and any usearch/vsearch annotations to avoid formatting foul-ups
216215 germ_id = ""
216+ if arguments ["-v" ] is not None :
217+ germ_id = germ_seq .id
218+ elif arguments ['--root' ] is not None :
219+ germ_id = arguments ['--root' ]
220+
217221 foundRoot = False
218222 gaps = defaultdict ( list )
219223 for seq in aln :
224+ #kill the fasta def line and any usearch/vsearch annotations to avoid formatting foul-ups
220225 seq .id = re .sub ("[;:].*" , "" , seq .id )
221226 seq .description = ""
222- if re .search ("(IG|VH|VK|VL|HV|KV|LV)" , seq .id , re .I ) is not None :
223- germ_id = seq .id
224-
225- if arguments ['--root' ] is not None and seq .id == arguments ['--root' ]:
227+ if germ_id == "" :
228+ if re .search ("(IG|VH|VK|VL|HV|KV|LV)" , seq .id , re .I ) is not None :
229+ germ_id = seq .id
230+ print ( f"Using { germ_id } as root sequence. If this is not correct, please run again with the `--root` option specified." )
231+ foundRoot = True
232+ elif seq .id == germ_id :
226233 foundRoot = True
227234
228235 for g in re .finditer ("-+" , str (seq .seq )):
229236 #save gap. value is a field to help me determine what's real in assignGaps
230237 gaps [ seq .id .upper () ].append ( {'start' :g .start (), 'end' :g .end (), 'value' :1 } )
231238
232- if arguments ['--root' ] is not None :
233- germ_id = arguments ['--root' ]
234- if not foundRoot :
235- sys .exit ("Couldn't find specified root sequence %s in input file" % arguments ['--root' ])
236- elif germ_id == "" :
239+ if germ_id == "" :
237240 sys .exit ("Couldn't find a germline gene in the alignment, please use the --root option and try again." )
241+ elif not foundRoot :
242+ sys .exit ("Couldn't find specified root sequence %s in input file" % germ_id )
238243
239244 with open ("%s/infile" % prj_tree .phylo , "w" ) as output :
240245 AlignIO .write (aln , output , "fasta" )
0 commit comments