-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathphylo2owl.py
executable file
·165 lines (140 loc) · 4.96 KB
/
phylo2owl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env python
"""
phylo2owl.py: Convert phylogenies into OWL ontologies.
"""
import os.path
import sys
import argparse
import dendropy
import pystache
__version__ = "0.1"
__author__ = "Gaurav Vaidya"
__copyright__ = "Copyright 2016 The Phyloreferencing Project"
# List of supported formats
# Based on formats supported by DendroPy,
# see https://pythonhosted.org/DendroPy/schemas/index.html#specifying-the-data-source-format
INPUT_FORMATS = ["newick", "nexus", "nexml"]
DEFAULT_FORMAT = "newick"
# Main function
def main():
""" Main function for the phylo2owl.py script """
output_name = 'example'
input_file = sys.stdin
output_file = sys.stdout
# Step 1. Parse command line arguments
cmdline_parser = argparse.ArgumentParser(
description="Convert phylogenies into OWL ontologies."
)
cmdline_parser.add_argument(
'input_filename',
metavar='input.tre',
type=str,
nargs='?',
help='Phylogeny file to parse'
)
cmdline_parser.add_argument(
'-f', '--format',
dest='input_format',
nargs='?',
type=str.lower, # Lowercase input type name.
choices=INPUT_FORMATS,
default=DEFAULT_FORMAT,
help='Input format (for input filename or standard input)'
)
cmdline_parser.add_argument(
'-o',
dest='output_filename',
metavar='output.owl',
type=str,
help='Ontology file to output'
)
cmdline_parser.add_argument(
'-n', '--name',
dest='output_name',
metavar='output_name',
type=str,
help='Name of the resource to emit'
)
cmdline_parser.add_argument(
'-v', '--version',
action='version',
version='%(prog)s ' + __version__
)
cmdline_parser.add_argument(
'--verbose',
dest='flag_verbose',
default=False,
action='store_true',
help='Display debugging information'
)
args = cmdline_parser.parse_args()
# Set up FLAG_VERBOSE.
flag_verbose = args.flag_verbose
# Step 2. Set up input and output streams.
# Try opening the input file.
if args.input_filename:
input_file = open(args.input_filename, 'r')
# Figure out where the output should go, as well as the output name.
if args.output_filename:
output_file = open(args.output_filename, 'w')
output_name = os.path.splitext(os.path.basename(args.output_filename))[0]
elif args.input_filename:
output_name = os.path.splitext(os.path.basename(args.input_filename))[0]
# But override output name if explicitly provided.
if args.output_name:
output_name = args.output_name
# TODO: Make sure output_name is a valid entity name, probably as defined
# here -- https://www.w3.org/TR/REC-xml/#NT-NameChar
if flag_verbose:
sys.stderr.write("Output name: {0}\n".format(output_name))
sys.stderr.write("Input file: {0}\n".format(input_file))
sys.stderr.write("Output file: {0}\n".format(output_file))
# Step 2. Use DendroPy to read input tree.
try:
tree = dendropy.Tree.get(file=input_file, schema=args.input_format)
except dendropy.utility.error.DataParseError as err:
sys.stderr.write("Error: could not parse input!\n{0}\n".format(err))
sys.exit(1)
if flag_verbose:
sys.stderr.write("Tree read successfully: {0}\n".format(tree))
# Step 3. Set up pystache to read templates.
render = pystache.Renderer(missing_tags='strict')
# Step 4. Write out the header.
xmlbase = "http://phyloinformatics.net/phylo/{0}".format(output_name)
xmlns = xmlbase + '#'
output_file.write(render.render_path('templates/header.txt', {
'name': output_name,
'xmlbase': xmlbase,
'xmlns': xmlbase + '#'
}))
# Step 5. Make a list of names for every node on this tree.
node_names = dict()
node_count = 1
for node in tree:
name = node.label
if node.taxon:
name = node.taxon.label.replace(' ', '_')
if name is None:
name = 'Node_{0}'.format(node_count)
node_count += 1
node_names[node] = name
if flag_verbose:
sys.stderr.write("Names assigned to {0} tree nodes:\n".format(len(node_names)))
for (node, name) in node_names.items():
sys.stderr.write(" - {0}: {1}\n".format(name, node))
sys.stderr.write("\n")
# Step 6. Write out each node on the tree.
node_count = 1
for node in tree:
output_file.write(render.render_path('templates/individual.txt', {
'xmlns': xmlns,
'term': node_names[node],
'name': output_name,
'children': [{'child': node_names[n]} for n in node.child_nodes()],
'siblings': [{'sibling': node_names[n]} for n in node.sibling_nodes()]
}))
# Step 7. Write out the footer.
output_file.write(render.render_path('templates/footer.txt'))
sys.exit(0)
if __name__ == '__main__':
main()