Skip to content

Commit

Permalink
#321 adding some documentation to query kegg and clearing path betwee…
Browse files Browse the repository at this point in the history
…n query kegg and kegg conversion
  • Loading branch information
ecwood committed Jul 27, 2023
1 parent 1e68baf commit de5d0a5
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 11 deletions.
25 changes: 14 additions & 11 deletions kegg_json_to_kg_jsonl.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,13 +463,17 @@ def process_enzyme(enzyme_dict, kegg_id, nodes_output, edges_output, update_date
edges_output.write(format_kegg_edge(node_id, pathway, update_date))


def make_kg2_graph(kegg, nodes_output, edges_output, update_date):
version_number = kegg['info']['version']
version_date = kegg['info']['update_date']
for kegg_id in kegg:
def make_kg2_graph(input_kegg, nodes_output, edges_output, update_date):
version_number = "TEMP"
version_date = "TEMP"
for kegg_input_dict in input_kegg:
for single_item in kegg_input_dict:
kegg_id = single_item
if kegg_id == 'info':
version_number = kegg_input_dict[kegg_id]['version']
version_date = kegg_input_dict[kegg_id]['update_date']
continue
kegg_dict = kegg[kegg_id]
kegg_dict = kegg_input_dict[kegg_id]
if KEGG_COMPOUND_PREFIX.match(kegg_id) is not None:
process_compound(kegg_dict, kegg_id, nodes_output, edges_output, update_date)

Expand Down Expand Up @@ -505,17 +509,16 @@ def make_kg2_graph(kegg, nodes_output, edges_output, update_date):
output_edges_file_name = args.outputEdgesFile
test_mode = args.test

input_jsonlines_info = kg2_util.start_read_jsonlines(input_file_name)
input_kegg = input_jsonlines_info[0]

nodes_info, edges_info = kg2_util.create_kg2_jsonlines(test_mode)
nodes_output = nodes_info[0]
edges_output = edges_info[0]

kegg = dict()
with open(input_file_name, 'r') as kegg_file:
update_date = kg2_util.convert_date(os.path.getmtime(input_file_name))
kegg = json.load(kegg_file)

make_kg2_graph(kegg, nodes_output, edges_output, update_date)
make_kg2_graph(input_kegg, nodes_output, edges_output, update_date)

kg2_util.end_read_jsonlines(input_jsonlines_info)
kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name)

print("Finish time: ", date())
4 changes: 4 additions & 0 deletions query_kegg.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,12 @@ def create_query_lists(kegg_id_dict, num_threads):

def create_threads(num_threads, output_writer):
kegg_id_dict, info_dict = preliminary_queries()
output_writer.write({"info": info_dict})
query_lists = create_query_lists(kegg_id_dict, num_threads)

threads = list()
print("Number of queriers: ", len(query_lists))
print("Starting at", kg2_util.date())
for kegg_querier, query_dict in query_lists:
print(kegg_querier.name + ": " + str(len(query_dict)))
thread = threading.Thread(target=kegg_querier.run_set_of_queries, args=(query_dict,))
Expand Down Expand Up @@ -169,6 +171,8 @@ def run_set_of_queries(self, kegg_id_dict):

for kegg_id in kegg_id_dict:
previous_line_starter = ''

# If we have a connection issue (which will cause a parsing error), spin until it works, but put a note in the log
while True:
try:
results = send_query(get_base_query + kegg_id)
Expand Down

0 comments on commit de5d0a5

Please sign in to comment.