Skip to content

Commit 32fe34e

Browse files
committed
Clade and branch-label names can be user-specified
Previously the `augur clades` command produced a node-data JSON which stored clade membership as the node-trait "clade_membership" and defined the basal nodes of each clade with the node-trait "clade_annotation". `augur export v2` interpreted the latter as a special-case and produced a branch label with the same name. The previous commit allowed `augur export` to be supplied node-data JSONs with a `branch_labels` structure. Here we update `augur clades` to export data in this structure, which allows the user to specify the keys to use. To preserve backwards compatibility if neither key is specified, we default to trait-name="clade_membership" and label-name="clade, which will be exported from `augur export v2` correctly without needing any configuration changes. Closes #720
1 parent 997095f commit 32fe34e

File tree

2 files changed

+41
-13
lines changed

2 files changed

+41
-13
lines changed

augur/clades.py

+35-10
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,9 @@ def assign_clades(clade_designations, all_muts, tree, ref=None):
102102
'''
103103

104104
clade_membership = {}
105+
basal_clade_nodes = {}
105106
parents = get_parent_name_by_child_name_for_tree(tree)
106107

107-
# first pass to set all nodes to unassigned as precaution to ensure attribute is set
108-
for node in tree.find_clades(order = 'preorder'):
109-
clade_membership[node.name] = {'clade_membership': 'unassigned'}
110-
111108
# count leaves
112109
for node in tree.find_clades(order = 'postorder'):
113110
node.leaf_count = 1 if node.is_terminal() else np.sum([c.leaf_count for c in node])
@@ -147,16 +144,40 @@ def assign_clades(clade_designations, all_muts, tree, ref=None):
147144
sorted_nodes = sorted(node_counts, key=lambda x: x.leaf_count, reverse=True)
148145
if len(sorted_nodes) > 0:
149146
target_node = sorted_nodes[0]
150-
clade_membership[target_node.name] = {'clade_annotation': clade_name, 'clade_membership': clade_name}
147+
basal_clade_nodes[target_node.name] = clade_name
148+
clade_membership[target_node.name] = clade_name # basal nodes are members of the clade
151149

152150
# third pass to propagate 'clade_membership'
153151
# don't propagate if encountering 'clade_annotation'
154152
for node in tree.find_clades(order = 'preorder'):
155153
for child in node:
156-
if 'clade_annotation' not in clade_membership[child.name]:
157-
clade_membership[child.name]['clade_membership'] = clade_membership[node.name]['clade_membership']
154+
# if the child doesn't define the start of its own clade, but the parent belongs to a clade, then inherit that membership
155+
if child.name not in basal_clade_nodes and node.name in clade_membership:
156+
clade_membership[child.name] = clade_membership[node.name]
157+
return (basal_clade_nodes, clade_membership)
158+
158159

159-
return clade_membership
160+
def create_node_data_structure(basal_clade_nodes, clade_membership, args):
161+
node_data = {}
162+
163+
if (not args.label_name and not args.trait_name):
164+
print("WARNING: running `augur clades` without specifying --label-name and/or")
165+
print(" --trait-name is deprecated. To preserve backwards compatibility")
166+
print(" we will use 'clade' and 'clade_membership', respectively.")
167+
print(" (Note that 'clade' is now exported as a 'branch_label')")
168+
169+
label_name = "clade"
170+
trait_name = "clade_membership"
171+
else:
172+
label_name = args.label_name
173+
trait_name = args.trait_name
174+
175+
if trait_name:
176+
node_data['nodes'] = {node: {trait_name: clade} for node,clade in clade_membership.items()}
177+
if label_name:
178+
node_data['branch_labels'] = {node: {label_name: clade} for node,clade in basal_clade_nodes.items()}
179+
180+
return node_data
160181

161182

162183
def get_reference_sequence_from_root_node(all_muts, root_name):
@@ -181,6 +202,8 @@ def register_arguments(parser):
181202
parser.add_argument('--mutations', nargs='+', help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
182203
parser.add_argument('--reference', nargs='+', help='fasta files containing reference and tip nucleotide and/or amino-acid sequences ')
183204
parser.add_argument('--clades', type=str, help='TSV file containing clade definitions by amino-acid')
205+
parser.add_argument('--trait-name', type=str, help='name to use to store clade membership (set for every node belonging to the clade)')
206+
parser.add_argument('--label-name', type=str, help='name to use for branch labels (set on basal branches for each clade)')
184207
parser.add_argument('--output-node-data', type=str, help='name of JSON file to save clade assignments to')
185208

186209

@@ -205,8 +228,10 @@ def run(args):
205228

206229
clade_designations = read_in_clade_definitions(args.clades)
207230

208-
clade_membership = assign_clades(clade_designations, all_muts, tree, ref)
231+
(basal_clade_nodes, clade_membership) = assign_clades(clade_designations, all_muts, tree, ref)
232+
233+
node_data = create_node_data_structure(basal_clade_nodes, clade_membership, args)
209234

210235
out_name = get_json_name(args)
211-
write_json({'nodes': clade_membership}, out_name)
236+
write_json(node_data, out_name)
212237
print("clades written to", out_name, file=sys.stdout)

augur/export_v2.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ def are_mutations_defined(node_attrs):
9292
return False
9393

9494

95-
def are_clades_defined(node_attrs):
95+
def is_clade_membership_defined(node_attrs):
9696
for node, data in node_attrs.items():
97-
if data.get("clade_membership") or data.get("clade_annotation"):
97+
if data.get("clade_membership"):
9898
return True
9999
return False
100100

@@ -291,7 +291,7 @@ def _get_colorings():
291291
colorings.insert(0,{'key':'gt'})
292292
if "num_date" not in explicitly_defined_colorings and are_dates_defined(node_attrs):
293293
colorings.insert(0,{'key':'num_date'})
294-
if "clade_membership" not in explicitly_defined_colorings and are_clades_defined(node_attrs):
294+
if "clade_membership" not in explicitly_defined_colorings and is_clade_membership_defined(node_attrs):
295295
colorings.insert(0,{'key':'clade_membership'})
296296

297297
return colorings
@@ -851,6 +851,9 @@ def transfer_mutations_to_branches(node_attrs, branch_attrs):
851851
branch_attrs[node_name]["labels"] = { "aa": aa_lab }
852852

853853
def transfer_clade_annotation_to_branches(node_attrs, branch_attrs):
854+
# NOTE: storing branch labels as `clade_annotation` is only possible
855+
# using an older version of augur. (`augur clades` no longer uses this).
856+
# This function should be removed upon the next major augur release (currently v12)
854857
for node_name, raw_data in node_attrs.items():
855858
if "clade_annotation" in raw_data and is_valid(raw_data["clade_annotation"]):
856859
if node_name not in branch_attrs:

0 commit comments

Comments
 (0)