-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathminify_h5.py
51 lines (42 loc) · 1.71 KB
/
minify_h5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
"""
This script extracts the longest time series from all HDF5 files matching the pattern *_j*.h5
found in a given directory, and combines the extracted data into a new HDF5 file with the same
name as the deepest directory in the path hierarchy. The output file is stored at the top level
of the specified directory.
"""
import os
import sys
import h5py
import numpy as np
def extract_full_timeseries(path_name: str):
"""
Retrieve full time series from all HDF5 files matching the pattern *_j*.h5
found in the directory specified by `path_name`. Combine the extracted data into a
new HDF5 file with the same name as the deepest directory in the path hierarchy and
store it at the top level of this directory.
Args:
path_name (str): The path of the directory containing the HDF5 files.
Returns:
None.
"""
f_min_filepath = f'{path_name}/{path_name}.h5'
f_min = h5py.File(f_min_filepath, 'a')
for subdir, _, files in os.walk(path_name):
for file in files:
filepath = os.path.join(subdir, file)
if '_j' not in file:
continue
print(filepath)
with h5py.File(filepath, 'r') as f_part:
for key, value in f_part.items():
# TODO: Store file number as attribute in combined file
t_len = len(value.attrs['times'])
if t_len == np.array(value).shape[2]:
if key in f_min.keys():
continue
else:
f_part.copy(f"/{key}", f_min["/"])
f_min.close()
if __name__ == '__main__':
PATH_NAME = sys.argv[1]
extract_full_timeseries(PATH_NAME)