Skip to content

Commit

Permalink
Add Parallel File Parsing and Improve testing (#31)
Browse files Browse the repository at this point in the history
* Add paprallel file parsring (can be turned off with OPPPY_USE_THREADS=False

* try/except multiprocessing availablity

* add multiprocessing to the dependencies

* temporarily remove python 3.11 testing

* add log plotting for 2d contours

* fix testing to actually... test

* fix interactive testing and improve coverage for -ls options in 2d contours

* add hdf h5py package to dependencies

* fix math syntax

* more math syntax fixes

* add serial file parsing tests

* update tests to use gold standards

---------

Co-authored-by: Cleveland <[email protected]>
  • Loading branch information
clevelam and Cleveland authored Jan 9, 2024
1 parent 790cc69 commit e852606
Show file tree
Hide file tree
Showing 29 changed files with 625 additions and 124 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install --user pytest-cov numpy matplotlib scipy argparse
pip install --user pytest-cov numpy matplotlib scipy argparse h5py
## - name: flake
## run: |
## # stop the build if there are Python syntax errors or undefined names
Expand Down
70 changes: 56 additions & 14 deletions opppy/dump_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@
'''

from numpy import *
import os
import sys
import pickle
import math
from multiprocessing import Process, Manager

from opppy.progress import progress

USE_THREADS = os.getenv("OPPPY_USE_THREADS", 'True').lower() in ('true', '1', 't')

def point_value_1d(data, x_key, value_key, x_value, method='nearest'):
'''
Grid data function. This function takes a 1D data structure from dictionary
Expand Down Expand Up @@ -114,7 +119,7 @@ def point_value_3d(data, x_key, y_key, z_key, value_key, x_value, y_value, z_val



def data2grid(data, x_key, y_key, value_key, npts=500, method='nearest'):
def data2grid(data, x_key, y_key, value_key, npts=500, method='nearest', log_scale=False):
'''
This function takes a 2D data structure from dictionary and creates a 2D
grid for each array by interpolating. This is useful for plotting.
Expand All @@ -138,14 +143,18 @@ def data2grid(data, x_key, y_key, value_key, npts=500, method='nearest'):
grid_data = {}
value = data[value_key]
grid_data[value_key] = griddata((X, Y), value, (xi, yi), method).T
if(log_scale):
grid_data[value_key] = [[0.0 if val<=0.0 else math.log10(val) for val in vals] for vals in
grid_data[value_key]]
grid_data[x_key] = xi
grid_data[y_key] = yi

return grid_data



def data2gridbox(data, x_key, y_key, value_key, xmin, ymin, xmax, ymax,npts=500, method='nearest'):
def data2gridbox(data, x_key, y_key, value_key, xmin, ymin, xmax, ymax,npts=500, method='nearest',
log_scale=False):
'''
This function takes a 2D data structure from a data dictionary and creates
a 2D grid for each array by interpolating in a user defined region.
Expand Down Expand Up @@ -174,14 +183,18 @@ def data2gridbox(data, x_key, y_key, value_key, xmin, ymin, xmax, ymax,npts=500,
grid_data = {}
value = data[value_key]
grid_data[value_key] = griddata((X, Y), value, (xi, yi), method).T
if(log_scale):
grid_data[value_key] = [[0.0 if val<=0.0 else math.log10(val) for val in vals] for vals in
grid_data[value_key]]
grid_data[x_key] = xi
grid_data[y_key] = yi

return grid_data



def data2grid3Dslice(data, x_key, y_key, z_key, value_key, z_slice_value, npts=500,method='nearest'):
def data2grid3Dslice(data, x_key, y_key, z_key, value_key, z_slice_value, npts=500,method='nearest',
log_scale=False):
'''
This function takes a 3D data structure from a data dictionary and creates
a 2D grid for each array by interpolating. This is useful for plotting.
Expand All @@ -206,6 +219,9 @@ def data2grid3Dslice(data, x_key, y_key, z_key, value_key, z_slice_value, npts=5
grid_data = {}
V = data[value_key]
grid_data[value_key] = griddata((X, Y, Z), V, (xi, yi, zi), method).T[0]
if(log_scale):
grid_data[value_key] = [[0.0 if val<=0.0 else math.log10(val) for val in vals] for vals in
grid_data[value_key]]
grid_data[x_key] = xi.T[0]
grid_data[y_key] = yi.T[0]

Expand Down Expand Up @@ -399,7 +415,8 @@ def extract_series_line(data_list,series_key,value_key,dim_keys,point0_values,po

return t, grid

def extract_series_2d(data_list, series_key, value_key, dim_keys, npts=500, method='nearest', box=[]):
def extract_series_2d(data_list, series_key, value_key, dim_keys, npts=500, method='nearest',
log_scale=False, box=[]):
'''
This function extracts the data values along a specified line from a
series of data dictionaries.
Expand All @@ -423,16 +440,18 @@ def extract_series_2d(data_list, series_key, value_key, dim_keys, npts=500, meth
for data in data_list:
T.append(data[series_key])
if len(box) == 0:
grid.append(data2grid(data, dim_keys[0], dim_keys[1], value_key, npts, method))
grid.append(data2grid(data, dim_keys[0], dim_keys[1], value_key, npts, method, log_scale))
else:
grid.append(data2gridbox(data, dim_keys[0], dim_keys[1], value_key, box[0], box[1], box[2], box[3],npts,method))
grid.append(data2gridbox(data, dim_keys[0], dim_keys[1], value_key, box[0], box[1],
box[2], box[3],npts,method, log_scale))

t = {}
t[series_key] = array(T)

return t, grid

def extract_series_2d_slice(data_list,series_key,value_key,dim_keys, slice_value, npts=500, method='nearest'):
def extract_series_2d_slice(data_list,series_key,value_key,dim_keys, slice_value, npts=500,
method='nearest', log_scale=False):
'''
This function extracts the data values along a specified line from a
series of data dictionaries.
Expand All @@ -459,7 +478,8 @@ def extract_series_2d_slice(data_list,series_key,value_key,dim_keys, slice_value
print("Error: series_key dictionary item must return a single value (i.e. cycle or time)")
sys.exit(0)

grid.append(data2grid3Dslice(data, dim_keys[0], dim_keys[1], dim_keys[2],value_key, slice_value, npts, method))
grid.append(data2grid3Dslice(data, dim_keys[0], dim_keys[1], dim_keys[2],value_key,
slice_value, npts, method, log_scale))

t = {}
t[series_key] = array(T)
Expand All @@ -468,7 +488,10 @@ def extract_series_2d_slice(data_list,series_key,value_key,dim_keys, slice_value

def append_dumps(data, dump_files, opppy_parser, key_words=None):
'''
Append output data from a list of output_files to a user provided dictionary using a user proved opppy_parser
Append output data from a list of output_files to a user provided dictionary using a user proved
opppy_parser. By default this function will use the multiprocessing option to parallelize the
parsing of multiple dumps. The parallel parsing can be disabled by setting
the environment variable 'OPPPY_USE_THREADS=False'
Input options:
data opppy input dictionary to be append to (must have a 'verion' opppy key)
Expand All @@ -477,13 +500,32 @@ def append_dumps(data, dump_files, opppy_parser, key_words=None):
append_date bool to specify if the data should be appended to the file
name for tracking purposes
'''

total = len(dump_files)
count = 0
for dump in dump_files:
# append new dictionary data to the pickle file
data[dump.split('/')[-1]] = opppy_parser.build_data_dictionary(dump,key_words)
count += 1
progress(count,total, 'of dump files read')
print('')
print("Number of files to be read: ", total)
if(USE_THREADS):
def thread_all(file_name, key_words, result_d):
result_d[file_name.split('/')[-1]] = opppy_parser.build_data_dictionary(file_name,key_words)
with Manager() as manager:
result_d = manager.dict()
threads = []
for file_name in dump_files:
thread = Process(target=thread_all, args=(file_name, key_words, result_d,))
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
count += 1
progress(count,total, 'of input files read')
data.update(result_d)
else:
for dump in dump_files:
# append new dictionary data to the pickle file
data[dump.split('/')[-1]] = opppy_parser.build_data_dictionary(dump,key_words)
count += 1
progress(count,total, 'of dump files read')

print('')
print('')
Expand Down
24 changes: 18 additions & 6 deletions opppy/interactive_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,11 +807,15 @@ def plot_series_contour(self, args):
if len(args.dimension_keys) != 3:
print('Error: z_slice_location specified so length of dimension_keys must be 3')
sys.exit(0)
tracer_t, tracer_grid = extract_series_2d_slice(dictionary_list, args.series_key, args.data_name, args.dimension_keys, args.z_slice_location, args.number_of_points, args.interpolation_method)
tracer_t, tracer_grid = extract_series_2d_slice(dictionary_list, args.series_key,
args.data_name, args.dimension_keys, args.z_slice_location,
args.number_of_points, args.interpolation_method, args.log_scale)
else:
if len(args.dimension_keys) != 2:
print('Error: z_slice_location specified is not specified so length of dimension_keys must be 2')
tracer_t, tracer_grid = extract_series_2d(dictionary_list, args.series_key, args.data_name, args.dimension_keys, args.number_of_points, args.interpolation_method)
tracer_t, tracer_grid = extract_series_2d(dictionary_list, args.series_key,
args.data_name, args.dimension_keys, args.number_of_points,
args.interpolation_method, args.log_scale)
series_data = series_pair(tracer_t, tracer_grid)
elif args.pickle_file is not None:
dictionary = pickle.load(open(args.pickle_file,'rb'))
Expand All @@ -825,11 +829,15 @@ def plot_series_contour(self, args):
if len(args.dimension_keys) != 3:
print('Error: z_slice_location specified so length of dimension_keys must be 3')
sys.exit(0)
tracer_t, tracer_grid = extract_series_2d_slice(dictionary_list, args.series_key, args.data_name, args.dimension_keys, args.z_slice_location, args.number_of_points, args.interpolation_method)
tracer_t, tracer_grid = extract_series_2d_slice(dictionary_list, args.series_key,
args.data_name, args.dimension_keys, args.z_slice_location,
args.number_of_points, args.interpolation_method, args.log_scale)
else:
if len(args.dimension_keys) != 2:
print('Error: z_slice_location specified is not specified so length of dimension_keys must be 2')
tracer_t, tracer_grid = extract_series_2d(dictionary_list, args.series_key, args.data_name, args.dimension_keys, args.number_of_points, args.interpolation_method)
tracer_t, tracer_grid = extract_series_2d(dictionary_list, args.series_key,
args.data_name, args.dimension_keys, args.number_of_points,
args.interpolation_method, args.log_scale)
series_data = series_pair(tracer_t, tracer_grid)
if args.case_file is not None:
dictionary_list = build_case_data_list(args.case_file, None, self.dump_parser, args.key_words)
Expand All @@ -839,11 +847,15 @@ def plot_series_contour(self, args):
if len(args.dimension_keys) != 3:
print('Error: z_slice_location specified so length of dimension_keys must be 3')
sys.exit(0)
tracer_t, tracer_grid = extract_series_2d_slice(dictionary_list, args.series_key, args.data_name, args.dimension_keys, args.z_slice_location, args.number_of_points, args.interpolation_method)
tracer_t, tracer_grid = extract_series_2d_slice(dictionary_list, args.series_key,
args.data_name, args.dimension_keys, args.z_slice_location,
args.number_of_points, args.interpolation_method, args.log_scale)
else:
if len(args.dimension_keys) != 2:
print('Error: z_slice_location specified is not specified so length of dimension_keys must be 2')
tracer_t, tracer_grid = extract_series_2d(dictionary_list, args.series_key, args.data_name, args.dimension_keys, args.number_of_points, args.interpolation_method)
tracer_t, tracer_grid = extract_series_2d(dictionary_list, args.series_key,
args.data_name, args.dimension_keys, args.number_of_points,
args.interpolation_method, args.log_scale)
series_data = series_pair(tracer_t, tracer_grid)

args.x_value_name= args.dimension_keys[0]
Expand Down
63 changes: 48 additions & 15 deletions opppy/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,14 @@
import sys
import pickle
import io
import os
import numpy as np
from multiprocessing import Process, Manager

from opppy.version import __version__
from opppy.progress import *

USE_THREADS = os.getenv("OPPPY_USE_THREADS", 'True').lower() in ('true', '1', 't')

def append_cycle_data(cycle_data, data, sort_key_string):
'''
Expand Down Expand Up @@ -312,7 +315,11 @@ def extract_cycle_data(cycle_string, my_opppy_parser):

def append_output_dictionary(data, output_files, opppy_parser, append_date=False):
'''
Append output data from a list of output_files to a user provided dictionary using a user proved opppy_parser
Append output data from a list of output_files to a user provided dictionary using a user proved
opppy_parser. By default this function will use the multiprocessing option to parallelize the
parsing of multiple dumps. The parallel parsing can be disabled by setting
the environment variable 'OPPPY_USE_THREADS=False'
arguments:
data opppy input dictionary to be append to (must have a 'verion' opppy key)
Expand All @@ -330,33 +337,59 @@ def append_output_dictionary(data, output_files, opppy_parser, append_date=False
print("This data dictionary has no version")
print("This version of OPPPY is ", __version__)
sys.exit(0)

time = ''
if append_date:
time = time+'.'+datetime.datetime.now().strftime ("%Y%m%d%H%M%S")
count = 0
total = len(output_files)
print('')
print("Number of files to be read: ", total)
cycle_string_list=[]
data_list = []
if(USE_THREADS):
def thread_all(file_name, result_d):
thread_cycle_string_list = get_output_lines(file_name, opppy_parser.cycle_opening_string,
opppy_parser.cycle_closing_string, opppy_parser.file_end_string);
thread_data = []
for cycle_string in thread_cycle_string_list:
thread_data.append(extract_cycle_data(cycle_string, opppy_parser))
result_d[file_name]=thread_data
with Manager() as manager:
result_d = manager.dict()
threads = []
for file_name in output_files:
thread = Process(target=thread_all, args=(file_name, result_d,))
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
count += 1
progress(count,total, 'of input files read')
for file_name in output_files:
data_list += result_d[file_name]
else:
cycle_string_list=[]
for file_name in output_files:
cycle_string_list+=get_output_lines(file_name, opppy_parser.cycle_opening_string, opppy_parser.cycle_closing_string, opppy_parser.file_end_string)
count += 1
progress(count,total, 'of input files read')

count = 0
total = len(cycle_string_list)
print('')
print("Number of cycles to be parsed: ", total)
for cycle_string in cycle_string_list:
data_list.append(extract_cycle_data(cycle_string, opppy_parser))
count += 1
progress(count,total, 'of cycles parsed')
print('')

for file_name in output_files:
cycle_string_list+=get_output_lines(file_name, opppy_parser.cycle_opening_string, opppy_parser.cycle_closing_string, opppy_parser.file_end_string)
if 'appended_files' in data:
data['appended_files'].append(file_name.split('/')[-1]+time)
else:
data['appended_files'] = [file_name.split('/')[-1]+time]
count += 1
progress(count,total, 'of input files read')

total = len(cycle_string_list)
count = 0
print('')
print("Number of cycles to be parsed: ", total)
for cycle_string in cycle_string_list:
cycle_data = extract_cycle_data(cycle_string, opppy_parser)
for cycle_data in data_list:
data = append_cycle_data(cycle_data,data,opppy_parser.sort_key_string)
count += 1
progress(count,total, 'of cycles parsed')

print('')
print('')
Expand Down
2 changes: 2 additions & 0 deletions opppy/plot_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from math import *
import argparse
import shlex
import warnings

from opppy.plotting_help import *

Expand Down Expand Up @@ -264,6 +265,7 @@ def plot_dict(self, args, dictionaries, data_names):
fig = PyPloter.savefig(args.figure_name, dpi=args.figure_resolution)
print("Plot save as -- "+args.figure_name)
elif(not args.hide_plot):
warnings.filterwarnings("ignore")
PyPloter.show()


Loading

0 comments on commit e852606

Please sign in to comment.