-
Notifications
You must be signed in to change notification settings - Fork 0
/
candidates_to_design.py
168 lines (154 loc) · 6.48 KB
/
candidates_to_design.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/local/bin/python3
# Copyright (C) 2020 TeselaGen Biotechnology, Inc.
# License: MIT
import math
import json
import textwrap
from pathlib import Path
from typing import List
import argparse
import sys
import pandas as pd
"""
This module provides a command line tool that generates a json design
that can be imported into TeselaGen's DESIGN module. As an input, this
tool uses a csv data table as the one can be exported from the *evolutions*
algorithm's result, at EVOLVE. Some usage examples can be found below
"""
def build_design_from_candidates(
candidates_data: List[dict],
bin_cols: List[str],
#lab_id: str,
#user_id: str,
name: str)->dict:
''' Builds design dict from candidates data
Args:
candidates_data: Records object from EVOLVE output data csv
bin_cols: List containing the names of the bins to be read from data
name: Name of design as will be seen on DESIGN.
Returns:
A dict that contains design information. This dict can be serialized as
json to be used within the DESIGN module.
'''
designs = {'design':{}}
# Use just the elements with valid priority (suggested candidates)
data = [d for d in candidates_data if not math.isnan(d['Priority'])]
print(f"Generating design using {len(data)} candidates")
# Create bins
designs['design']['bin'] = {f"{i+1}":{'id':f"{i+1}",
'direction': True,
'name': name,
'iconId':"1"} for i,name in enumerate(bin_cols)}
# Create Bincards
designs['design']['binCard'] = {f"{i+1}":{'id': f"{i+1}",
'index': i,
'cardId': "1",
'binId': f"{i+1}"} for i, name in enumerate(bin_cols)}
# Create card
designs['design']['card'] = {"1": {"id": "1",
"isRoot": True,
"circular": True,
"name": "Target Construct"}}
# Create design
designs['design']['design'] = {"1":{"id": "1",
#"userId": user_id,
#"labId": lab_id,
"name": name,
"type": "grand-design",
"layoutType": "list",
"numRows": len(data)}}
# Create elements
designs['design']['element'] = {}
el_i = 0
for i_bin, bin_name in enumerate(bin_cols):
for i_el, el in enumerate(data):
el_i += 1
designs['design']['element'][f"{el_i}"] = {
"id": f"{el_i}",
"name": el[bin_name],
"index": i_el,
"binId": f"{i_bin+1}"}
# Set icon
designs['design']['icon'] = {"1": {"id": "1",
"isSbol": True,
"name": "USER-DEFINED",
"path": "M 5 40 L 5 60 L 45 60 L 45 40 Z"}}
# Maybe this is not needed
designs["hdeDesignExportVersion"] = 3
#print("Done!")
return designs
def candidates_to_design(input_file: str, bin_cols: List[str], name: str, output: str):
''' Read candidates file (csv) and generates json file that can be read from DESIGN
module.
Args:
input_file: Path to the input csv file as string.
bin_cols: A list with the names of the bin columns at the csv file
name: Name of output design
output: Path of the output file as string.
'''
# Get arguments
if 'csv' in input_file:
# Loads data from a csv file downloaded from EVOLVE UI
data_df = pd.read_csv(input_file)
data = data_df.to_dict(orient="records")
# Build dictionary object with design
design_dict = build_design_from_candidates(
candidates_data=data,#: List[dict],
bin_cols= bin_cols,
name= name)
# Write design to disc
#Path('_export/').mkdir(parents=True, exist_ok=True)
with open(output, 'w') as f:
json.dump(design_dict, f)
print(f"Created design at '{output}'")
def args_parser(args_in):
"""Parses arguments from the command line tool"""
parser = argparse.ArgumentParser(
prog='candidates_to_design',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent('''\
Script for generating a design json from a set of evolution's candidates.
Example usage:
Download output data from evoultion result "Teselagen Example Evolutive Model".
Store this file in this same folder with the name "evolve_exported_results.csv".
The next line will create a file with the default name "EVOLVE_candidates.json"
with a design containing valid candidates.
>>> python3 candidates_to_design.py --bin_cols "Teselagen Enzyme A" "Teselagen Enzyme B" --input "evolve_exported_results.csv"
'''))
parser.add_argument('--input',
default="input.csv",
#metavar='-i',
type=str,
help='Path to csv input file',
)
parser.add_argument('--bin_cols',
#metavar='-i',
required=True,
type=str,
nargs='+',
help='List containing the names of the bins to be read from data. These names should be column names at the input',
)
parser.add_argument('--name',
default="EVOLVE_candidates",
#metavar='-i',
type=str,
help='Path to csv input file',
)
parser.add_argument('--output',
default="EVOLVE_candidates.json",
#metavar='-i',
type=str,
help='Path to json output file',
)
return parser.parse_args(args_in)
# Command line tool, parses arguments and call candidates_to_design
if __name__ == "__main__":
args = args_parser(sys.argv[1:])
print(args)
candidates_to_design(
input_file=args.input,
bin_cols=args.bin_cols,
name=args.name,
output=args.output
)
print("Done!")