Skip to content

Commit

Permalink
oldhepdata parser: reformat correlation/covariance matrices
Browse files Browse the repository at this point in the history
* Transform a square matrix into a format with two independent variables and one dependent variable.
* This transformation is already done in the YAML format exported from the old HepData site.

Signed-off-by: Graeme Watt <[email protected]>
  • Loading branch information
GraemeWatt committed Aug 9, 2016
1 parent b2ae5a3 commit f048245
Showing 1 changed file with 51 additions and 3 deletions.
54 changes: 51 additions & 3 deletions hepdata_converter/parsers/oldhepdata_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,54 @@ def _parse_table_data(self, data):
energy = energy_min
self._parse_energies(energy)

if any(word in self.current_table.description.lower() for word in ['covariance', 'correlation', 'matrix']):
reformatted = self._reformat_matrix()

def _reformat_matrix(self):
"""Transform a square matrix into a format with two independent variables and one dependent variable.
"""
nxax = len(self.current_table.data['independent_variables'])
nyax = len(self.current_table.data['dependent_variables'])
npts = len(self.current_table.data['independent_variables'][0]['values'])

# check if 1 x-axis, and npts (>=2) equals number of y-axes
if nxax != 1 or nyax != npts or npts < 2:
return False

# add second independent variable with each value duplicated npts times
xheader = self.current_table.data['independent_variables'][0]['header']
self.current_table.data['independent_variables'].append({'header': xheader, 'values': []})
for value in self.current_table.data['independent_variables'][0]['values']:
self.current_table.data['independent_variables'][1]['values'].extend(npts * [value])

# duplicate values of first independent variable npts times
self.current_table.data['independent_variables'][0]['values'] \
= npts * self.current_table.data['independent_variables'][0]['values']

# suppress header if different for second y-axis
if self.current_table.data['dependent_variables'][0]['header'] != \
self.current_table.data['dependent_variables'][1]['header']:
self.current_table.data['dependent_variables'][0]['header'] = {'name': ''}

# remove qualifier if different for second y-axis
iqdel = [] # list of qualifier indices to be deleted
for iq, qualifier in enumerate(self.current_table.data['dependent_variables'][0]['qualifiers']):
if qualifier != self.current_table.data['dependent_variables'][1]['qualifiers'][iq]:
iqdel.append(iq)
for iq in iqdel[::-1]: # need to delete in reverse order
del self.current_table.data['dependent_variables'][0]['qualifiers'][iq]

# append values of second and subsequent y-axes to first dependent variable
for iy in range(1, nyax):
for value in self.current_table.data['dependent_variables'][iy]['values']:
self.current_table.data['dependent_variables'][0]['values'].append(value)

# finally, delete the second and subsequent y-axes in reverse order
for iy in range(nyax-1, 0, -1):
del self.current_table.data['dependent_variables'][iy]

return True

def _parse_dserror(self, data):
"""Parse dserror attribute of the old HEPData format
Expand Down Expand Up @@ -447,7 +495,7 @@ def _parse_qual(self, data):
headers = data.split(':')
name = headers[0].strip()

name = name.split(' IN ')
name = re.split(' IN ', name, flags=re.I) # ignore case
units = None
if len(name) > 1:
units = name[1].strip()
Expand All @@ -463,7 +511,7 @@ def _parse_qual(self, data):

# extract energy if SQRT(S) is one of the qualifiers
if name.startswith('SQRT(S)') and lower(units) in ('gev'):
energies = xheader['value'].split(' TO ')
energies = re.split(' TO ', xheader['value'], flags=re.I)
for energy in energies:
try:
energy = float(energy)
Expand All @@ -486,7 +534,7 @@ def _parse_header(self, data):
headers = data.split(':')

for header in headers:
header = header.split(' IN ')
header = re.split(' IN ', header, flags=re.I) # ignore case
xheader = {'name': header[0].strip()}
if len(header) > 1:
xheader['units'] = header[1].strip()
Expand Down

0 comments on commit f048245

Please sign in to comment.