@@ -64,6 +64,7 @@ def __init__(self, nodes_filename, names_filename):
64
64
# "Memory" data structure to be populated at function calls
65
65
# For faster response in case of same query is asked again
66
66
self .lineages = {}
67
+ self .distances = {}
67
68
self .lca_mappings = {}
68
69
69
70
# Add nodes to self.taxonomy
@@ -293,6 +294,77 @@ def get_distance(self, tax_id_ancestor, tax_id):
293
294
294
295
return distance
295
296
297
+ def get_distance_generic (self , tax_id_1 , tax_id_2 ):
298
+ """
299
+ Fnc get_distance() calculates a distance between an ancestor and a
300
+ descendant, and might be legacy.
301
+
302
+ Creating this fnc for a general case where we want to know how many
303
+ edges we need to traverse in order to get from tax_id_1 to tax_id_2.
304
+ """
305
+ distance = None
306
+
307
+ # Extra calcs to check for distance from self.distances
308
+ tax_id_small = min (tax_id_1 , tax_id_2 )
309
+ tax_id_large = max (tax_id_1 , tax_id_2 )
310
+
311
+ # self.distances is ordered... smallest tax_id always goes first
312
+ if tax_id_small in self .distances :
313
+ if tax_id_large in self .distances [tax_id_small ]:
314
+ lca = self .distances [tax_id_small ][tax_id_large ]
315
+ else :
316
+ self .distances [tax_id_small ] = {}
317
+
318
+ # Do we need to calculate the distance?
319
+ if distance is None :
320
+
321
+ # Lowest common ancestor
322
+ lca = self .get_lca (tax_id_1 , tax_id_2 )
323
+
324
+ # Sum of distances between both tax_ids and the LCA makes the total distance
325
+ distance_1 = self .get_distance (lca , tax_id_1 )
326
+ distance_2 = self .get_distance (lca , tax_id_2 )
327
+ distance = distance_1 + distance_2
328
+
329
+ # The lineage of the clade, make a set of it
330
+ lineage = set (self .taxonomy_tree .get_lineage ([clade_id ])[clade_id ])
331
+ lineage .remove (clade_id ) # Only want tax_ids above the clade tax_id
332
+
333
+ # The tax_id is an ancestor of the clade root
334
+ if tax_id in lineage :
335
+
336
+ # The distance between the clade root and the ancestor
337
+ distance = self .taxonomy_tree .get_distance (tax_id , clade_id )
338
+
339
+ # Not an ancestor, must compute two distances and add them together
340
+ else :
341
+
342
+ # Get the lineage of the tax_id
343
+ tax_id_lineage = self .taxonomy_tree .get_lineage ([tax_id ])[tax_id ]
344
+ tax_id_lineage .reverse () # Flip the lineage so that it goes from leaf to root
345
+
346
+ # Loop to find the lowest common ancestor (lca) of the clade id and
347
+ # the tax_id that we are currently getting the distance to
348
+ lca = None
349
+ for ancestor in tax_id_lineage :
350
+ if ancestor in lineage :
351
+ lca = ancestor
352
+ break
353
+
354
+ # Find the distance between the lca and the clade id
355
+ clade_lca_distance = self .taxonomy_tree .get_distance (ancestor , clade_id )
356
+
357
+ # Find the distance between the lca and the tax_id currently
358
+ # being investigated
359
+ tax_id_lca_distance = self .taxonomy_tree .get_distance (ancestor , tax_id )
360
+
361
+ # The distance between the clade id and the tax_id is the sum of
362
+ # the two distances
363
+ distance = clade_lca_distance + tax_id_lca_distance
364
+
365
+ # Save the distance between the clade id and the tax_id
366
+ distance_dict [tax_id ] = distance
367
+
296
368
def get_rank (self , tax_id_list ):
297
369
"""
298
370
Returns the rank of each tax_id.
0 commit comments