-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path500_calculate_sankey_pathogen_table.py
executable file
·61 lines (49 loc) · 1.88 KB
/
500_calculate_sankey_pathogen_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import csv
from collections import defaultdict
import argparse
from tqdm import tqdm
def process_csv(file_path):
"""
Process CSV file and create a hierarchy structure.
Args:
file_path (str): Path to the input CSV file
Returns:
defaultdict: Nested dictionary representing the hierarchy
"""
hierarchy = defaultdict(lambda: defaultdict(int))
with open(file_path, 'r') as csvfile:
reader = csv.reader(csvfile)
headers = next(reader) # Skip the header row
for row in tqdm(reader, desc="Processing rows"):
for i in range(len(row) - 1):
parent = row[i]
child = row[i + 1]
hierarchy[parent][child] += 1
return hierarchy
def print_hierarchy(hierarchy, output_file=None):
"""
Print or write the hierarchy to a file.
Args:
hierarchy (defaultdict): Nested dictionary representing the hierarchy
output_file (str, optional): Path to the output file. If None, print to console.
"""
output = []
for parent, children in hierarchy.items():
for child, count in children.items():
if count >= 20: # Only include if count is 10 or more
output.append(f"{parent} [{count}] {child}")
if output_file:
with open(output_file, 'w') as f:
f.write('\n'.join(output))
print(f"Results written to {output_file}")
else:
print('\n'.join(output))
def main():
parser = argparse.ArgumentParser(description="Process CSV file to create a hierarchy structure.")
parser.add_argument("-i", "--input", required=True, help="Input CSV file path")
parser.add_argument("-o", "--output", help="Output file path (optional)")
args = parser.parse_args()
hierarchy = process_csv(args.input)
print_hierarchy(hierarchy, args.output)
if __name__ == "__main__":
main()