Skip to content

Commit 4d05cab

Browse files
committed
update augur clades, add tests
Commit should be squashed after review
1 parent 32fe34e commit 4d05cab

7 files changed

+155
-29
lines changed

augur/clades.py

+9-29
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def assign_clades(clade_designations, all_muts, tree, ref=None):
133133
node.sequences[gene][pos] = d
134134

135135

136-
# second pass to assign 'clade_annotation' to basal nodes within each clade
136+
# store names of basal nodes of each clade in `basal_clade_nodes` and `clade_membership` dicts.
137137
# if multiple nodes match, assign annotation to largest
138138
# otherwise occasional unwanted cousin nodes get assigned the annotation
139139
for clade_name, clade_alleles in clade_designations.items():
@@ -147,7 +147,7 @@ def assign_clades(clade_designations, all_muts, tree, ref=None):
147147
basal_clade_nodes[target_node.name] = clade_name
148148
clade_membership[target_node.name] = clade_name # basal nodes are members of the clade
149149

150-
# third pass to propagate 'clade_membership'
150+
# propagate 'clade_membership' to children nodes
151151
# don't propagate if encountering 'clade_annotation'
152152
for node in tree.find_clades(order = 'preorder'):
153153
for child in node:
@@ -157,29 +157,6 @@ def assign_clades(clade_designations, all_muts, tree, ref=None):
157157
return (basal_clade_nodes, clade_membership)
158158

159159

160-
def create_node_data_structure(basal_clade_nodes, clade_membership, args):
161-
node_data = {}
162-
163-
if (not args.label_name and not args.trait_name):
164-
print("WARNING: running `augur clades` without specifying --label-name and/or")
165-
print(" --trait-name is deprecated. To preserve backwards compatibility")
166-
print(" we will use 'clade' and 'clade_membership', respectively.")
167-
print(" (Note that 'clade' is now exported as a 'branch_label')")
168-
169-
label_name = "clade"
170-
trait_name = "clade_membership"
171-
else:
172-
label_name = args.label_name
173-
trait_name = args.trait_name
174-
175-
if trait_name:
176-
node_data['nodes'] = {node: {trait_name: clade} for node,clade in clade_membership.items()}
177-
if label_name:
178-
node_data['branch_labels'] = {node: {label_name: clade} for node,clade in basal_clade_nodes.items()}
179-
180-
return node_data
181-
182-
183160
def get_reference_sequence_from_root_node(all_muts, root_name):
184161
# attach sequences to root
185162
ref = {}
@@ -202,8 +179,7 @@ def register_arguments(parser):
202179
parser.add_argument('--mutations', nargs='+', help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
203180
parser.add_argument('--reference', nargs='+', help='fasta files containing reference and tip nucleotide and/or amino-acid sequences ')
204181
parser.add_argument('--clades', type=str, help='TSV file containing clade definitions by amino-acid')
205-
parser.add_argument('--trait-name', type=str, help='name to use to store clade membership (set for every node belonging to the clade)')
206-
parser.add_argument('--label-name', type=str, help='name to use for branch labels (set on basal branches for each clade)')
182+
parser.add_argument('--attribute-name', type=str, default="clade", help="name to use for clade membership & branch labels", required=False)
207183
parser.add_argument('--output-node-data', type=str, help='name of JSON file to save clade assignments to')
208184

209185

@@ -230,8 +206,12 @@ def run(args):
230206

231207
(basal_clade_nodes, clade_membership) = assign_clades(clade_designations, all_muts, tree, ref)
232208

233-
node_data = create_node_data_structure(basal_clade_nodes, clade_membership, args)
209+
# create node_data for export as a JSON
210+
node_data = {
211+
'nodes': {node: {args.attribute_name: clade} for node,clade in clade_membership.items()},
212+
'branch_labels': {node: {args.attribute_name: clade} for node,clade in basal_clade_nodes.items()}
213+
}
234214

235215
out_name = get_json_name(args)
236216
write_json(node_data, out_name)
237-
print("clades written to", out_name, file=sys.stdout)
217+
print(f"clades written to {out_name} using attribute name {args.attribute_name}", file=sys.stdout)

tests/functional/clades.t

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
Integration tests for augur clades.
2+
3+
$ pushd "$TESTDIR" > /dev/null
4+
$ export AUGUR="../../bin/augur"
5+
6+
Run augur clades without --attribute-name. We expect the name to be "clade"
7+
8+
$ ${AUGUR} clades \
9+
> --tree clades/tree.nwk \
10+
> --clades clades/clades.tsv \
11+
> --mutations clades/nt_muts.json \
12+
> --output-node-data "$TMP/default.json" > /dev/null
13+
14+
$ diff -u "clades/expected-output-default.json" "$TMP/default.json"
15+
(This diff will fail once we have a major version bump... is there a better JSON diff tool to use?)
16+
17+
Run augur clades with a custom --attribute-name
18+
19+
$ ${AUGUR} clades \
20+
> --tree clades/tree.nwk \
21+
> --clades clades/clades.tsv \
22+
> --mutations clades/nt_muts.json \
23+
> --attribute-name custom \
24+
> --output-node-data "$TMP/custom-attr.json" > /dev/null
25+
26+
$ diff -u "clades/expected-output-custom-attr.json" "$TMP/custom-attr.json"
27+
(This diff will fail once we have a major version bump... is there a better JSON diff tool to use?)
28+
29+
# Ensure the only change between runs of `augur clades` is the attr name used
30+
$ cat "$TMP/default.json" | sed "s/clade/custom/" > "$TMP/default-now-custom.json"
31+
$ diff -u "$TMP/default-now-custom.json" "$TMP/custom-attr.json"
32+
33+
Cleanup
34+
$ rm -f "$TMP/default.json" "$TMP/custom-attr.json" "$TMP/default-now-custom.json"

tests/functional/clades/clades.tsv

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
clade gene site alt
2+
3+
# the 1b mutation only once, on the branch leading to tips B and C
4+
# thus we expect the clade label to be on node `internalAB`
5+
cladeCB nuc 1 B
6+
# the 2c mutation appears twice -- on branch `internalAB` and `internalDEF`
7+
# as the latter has 3 descendants, it is chosen over the former
8+
cladeDEF nuc 2 C
9+
# mutation 3e appears only on a terminal node (tipE)
10+
# but we still expect both a branch label and a node_attr
11+
# this means that tipE should be annotated "cladeE" and _not_ "cladeDEF"
12+
cladeE nuc 3 E
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"branch_labels": {
3+
"internalBC": {
4+
"custom": "cladeCB"
5+
},
6+
"internalDEF": {
7+
"custom": "cladeDEF"
8+
},
9+
"tipE": {
10+
"custom": "cladeE"
11+
}
12+
},
13+
"generated_by": {
14+
"program": "augur",
15+
"version": "12.0.0"
16+
},
17+
"nodes": {
18+
"internalBC": {
19+
"custom": "cladeCB"
20+
},
21+
"internalDEF": {
22+
"custom": "cladeDEF"
23+
},
24+
"tipB": {
25+
"custom": "cladeCB"
26+
},
27+
"tipC": {
28+
"custom": "cladeCB"
29+
},
30+
"tipD": {
31+
"custom": "cladeDEF"
32+
},
33+
"tipE": {
34+
"custom": "cladeE"
35+
},
36+
"tipF": {
37+
"custom": "cladeDEF"
38+
}
39+
}
40+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"branch_labels": {
3+
"internalBC": {
4+
"clade": "cladeCB"
5+
},
6+
"internalDEF": {
7+
"clade": "cladeDEF"
8+
},
9+
"tipE": {
10+
"clade": "cladeE"
11+
}
12+
},
13+
"generated_by": {
14+
"program": "augur",
15+
"version": "12.0.0"
16+
},
17+
"nodes": {
18+
"internalBC": {
19+
"clade": "cladeCB"
20+
},
21+
"internalDEF": {
22+
"clade": "cladeDEF"
23+
},
24+
"tipB": {
25+
"clade": "cladeCB"
26+
},
27+
"tipC": {
28+
"clade": "cladeCB"
29+
},
30+
"tipD": {
31+
"clade": "cladeDEF"
32+
},
33+
"tipE": {
34+
"clade": "cladeE"
35+
},
36+
"tipF": {
37+
"clade": "cladeDEF"
38+
}
39+
}
40+
}

tests/functional/clades/nt_muts.json

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"nodes": {
3+
"tipA": {"muts": [], "aa_muts": {}},
4+
"tipB": {"muts": [], "aa_muts": {}},
5+
"tipC": {"muts": [], "aa_muts": {}},
6+
"tipD": {"muts": [], "aa_muts": {}},
7+
"tipE": {"muts": ["A3E"], "aa_muts": {}},
8+
"tipF": {"muts": [], "aa_muts": {}},
9+
"internalBC": {
10+
"muts": ["A1B", "A2C"],
11+
"aa_muts": {}
12+
},
13+
"internalDEF": {
14+
"muts": ["A2C"],
15+
"aa_muts": {}
16+
},
17+
"ROOT":{"muts": [], "aa_muts": {}}
18+
}
19+
}

tests/functional/clades/tree.nwk

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
(tipA:1,(tipB:1,tipC:1)internalBC:2,(tipD:3,tipE:4,tipF:1)internalDEF:5)ROOT:1;

0 commit comments

Comments
 (0)