-
Notifications
You must be signed in to change notification settings - Fork 0
/
Update_phylo_tree_json.py
146 lines (116 loc) · 4.43 KB
/
Update_phylo_tree_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python3.10
# # -*- coding: utf-8 -*-
""" Update phylogenetic tree within a json file
Part 1: Finds the name of the node with the most number of descendents
Part 2: Load the json file, index it, search the path of the target node,
return the path in string format
Part 3: Adjust the name of the path so it can be used to add key clade
and value lII.b to the dictionary labels within the key branch_attrs
of the children
Part 4: Write the new updated dictionary to a json file
"""
import json
import pandas as pd
from ete3 import Tree
from benedict import benedict
# Part 1
def read_tree():
# Reading tree with ete3 eases searching for the node
tree = Tree("/path/to/tree.nwk", format = 1)
return tree
def create_dict(tree):
# Creates a dictionary of nodes and correspondents number of descendents.
# It makes a list of number of descendents and put in descending order
dic = {}
for node in tree.traverse("postorder"):
dic[node.name] = len(node)
ordered_dic = dict(sorted(dic.items(), key=lambda item: item[1], reverse = True))
ordered_list = sorted(list(dic.values()), reverse = True)
return ordered_dic, ordered_list
def grab_target_node(dic,rank):
num_d = dic[rank]
return num_d
def get_name(dic,pos_value):
for key,value in dic.items():
if pos_value == value:
node_name = key
return node_name
# Part 2
def read_my_dict():
"""Load json file with benedict will preserve the
dictionary format, besides leaving flexibility
for keypath_separator. This is set to 'None'
upon loading so that it does not cause error
when there is a dot in any of the keys or values,
since the keypath_separator default is a dot.
"""
my_dict_new = benedict.from_json("/path/to/file.json",keypath_separator=None)
my_dict_new.standardize()
my_dict_new = benedict(my_dict_new, keypath_separator = ".")
return my_dict_new
def index_dict(v):
# Indexing all keys and values and putting them in a list
g = []
def print_dict(v,prefix=''):
if isinstance(v, dict):
for k, v2 in v.items():
p2 = "{}.{}".format(prefix,k)
print_dict(v2,p2)
elif isinstance(v,list):
for i, v2 in enumerate(v):
# the brackets here will be useful later to access the list index
p2 = "{}[{}]".format(prefix,i)
print_dict(v2,p2)
else:
g.append(['{}'.format(prefix),v])
return g
return print_dict(v)
def target_path(g,node_name):
# Grab the indexed keys of the target node yield from part 1 above
for i in g:
for j in i:
if j == node_name:
nome = i[0]
return nome
# Part 3
def prep_name(nome):
nome1 = nome.strip('name') + "branch_attrs.labels"
nome1 = nome1.strip(".")
return nome1
def update_dict(dic,path_name):
return dic[path_name].update({"new_key":"new_value"})
def conv_bene_write(n1):
""" Saving dictionary from benedict type does not take a second argument
as pandas.DataFrame.to_json does such as the indentation of the json file.
Since Auspice takes a very specific format with the indentation I'm
forced to read the final dictionary back in pandas and save it setting
the 'indent' argument
"""
dic_to_save = n1.to_json() # This takes one argument only
dic_to_save = pd.read_json(dic_to_save)
dic_to_save.to_json("/path/to/updated_file.json", indent = 1)
def main():
"""Part 1. Node with the highest number of descendents
Create an empty dictionary
Iterate each node and count the leaves
Add the name of the node as key and number of leaves to the dictionary
Get the numbers from the dictionary and order it in a list
Map the number with the name of the node
"""
tree = read_tree()
dic_result = create_dict(tree)
num_desc = grab_target_node(dic_result[1],9)
node_name = get_name(dic_result[0],num_desc)
"""Part 2. Read and index json file, then grabs the path of the target node
"""
my_dic = read_my_dict()
my_dic_idx = index_dict(my_dic)
path = target_path(my_dic_idx,node_name)
"""Part 3. Prepare the name of the path and update the json file within the target path
"""
path = prep_name(path)
update_dict(my_dic,path)
""" Part 4. Write the updated dic to json file
"""
conv_bene_write(my_dic)
main()