MappingOPUEvtId.py

#!/usr/bin/env python
# coding: utf-8

# In[25]:


# -*- coding: utf-8 -*-
# This code takes as an argument the files generated by TransformArraySparseFloat.py and outputs their projections
# using the appropriate quartile file. The output directory needs to be changed manually if needed.

import argparse
import os

import numpy as np
import pandas as pd

import sparse
from scipy.ndimage.filters import maximum_filter
from tqdm import tqdm

from lightonml.projections.sklearn import OPUMap

def file2arr(filestub):
        '''
        Transform the files of the 'filestub'-th generated process into a
        set of canvas and labels.
        Parameters
        ----------
        filestub: int,
            the number characterizing the process considered. Must be padded
            with zeros to be of length 3
        Returns
        ----------
        canvas: np.ndarray,
            boolean array to be mapped to the DMD
        labels: np.ndarray
        '''
        f = {}
        
        namedir = '/refactor_data/StageM1VS/SortieTASF_bdt/fmixed_data_'
        
        filenames = ['ECAL_ForwardN', 'ECAL_EBEE', 'ECAL_ForwardP',
                 'ECAL_Gamma_ForwardN', 'ECAL_Gamma_EBEE', 'ECAL_Gamma_ForwardP',
                 'HCAL_ForwardN', 'HCAL_HEN', 'HCAL_HB', 'HCAL_HEP', 'HCAL_ForwardP']

        labels = np.load(namedir + filestub + ".h5Labels.npz",allow_pickle=False)['data']
        eventID = np.load(namedir + filestub + ".h5EventID.npz",allow_pickle=False)['data']
        
        nevents = labels.shape[0]
        for i, fn in enumerate(filenames):
            f[fn] = sparse.load_npz(namedir + filestub + '.h5' + fn + ".npz")

        canvas = np.zeros([nevents, 900, 1115], dtype=bool)

        for iimg in range(nevents):
            for ilab, irow in quartile_filter.iterrows():
                img = f[ilab][iimg].todense()
                fimg = maximum_filter(img > irow.qval, irow["filter"])
                if irow.t: fimg = fimg.T
                y = int(irow.y)
                x = int(irow.x)
                if irow.x == 850:
                    y += int(186 * irow.idq)
                else:
                    y += int(372 * irow.idq)
                canvas[iimg, x:x + fimg.shape[0], y:y + fimg.shape[1]] = fimg
        return canvas, labels, eventID

# Output directory, change if needed
outdir = "/refactor_data/StageM1VS/SortieOPU_bdt/"
print("Output directory:", outdir)
    
ncomp=200000 #number of random features produced
typ='ttbar' #Process type ttbar or W
    
quartile_filter = pd.read_csv('/refactor_data/StageM1VS/Quartiles/quartile_filters_'+typ+'.csv',
                                    index_col=0)
    
random_mapping = OPUMap(n_components=ncomp,
                            ndims=2
                            )

with random_mapping.opu:
    for i, filestubn in enumerate(tqdm(range((len(os.listdir('/refactor_data/StageM1VS/SortieTASF_bdt')))//13))):
        filestub = f"{filestubn}"
        arr, labels, eventID = file2arr(filestub)
        if i == 0:
            OPUoutput = random_mapping.fit_transform(arr)
        else:
            OPUoutput = random_mapping.transform(arr)
        np.savez_compressed(outdir + f"{filestub}.npz",
                                OPU=OPUoutput,
                                labels=labels, eventID=eventID)


# In[ ]: