-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMappingOPUEvtId.py
100 lines (75 loc) · 3.25 KB
/
MappingOPUEvtId.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
# coding: utf-8
# In[25]:
# -*- coding: utf-8 -*-
# This code takes as an argument the files generated by TransformArraySparseFloat.py and outputs their projections
# using the appropriate quartile file. The output directory needs to be changed manually if needed.
import argparse
import os
import numpy as np
import pandas as pd
import sparse
from scipy.ndimage.filters import maximum_filter
from tqdm import tqdm
from lightonml.projections.sklearn import OPUMap
def file2arr(filestub):
'''
Transform the files of the 'filestub'-th generated process into a
set of canvas and labels.
Parameters
----------
filestub: int,
the number characterizing the process considered. Must be padded
with zeros to be of length 3
Returns
----------
canvas: np.ndarray,
boolean array to be mapped to the DMD
labels: np.ndarray
'''
f = {}
namedir = '/refactor_data/StageM1VS/SortieTASF_bdt/fmixed_data_'
filenames = ['ECAL_ForwardN', 'ECAL_EBEE', 'ECAL_ForwardP',
'ECAL_Gamma_ForwardN', 'ECAL_Gamma_EBEE', 'ECAL_Gamma_ForwardP',
'HCAL_ForwardN', 'HCAL_HEN', 'HCAL_HB', 'HCAL_HEP', 'HCAL_ForwardP']
labels = np.load(namedir + filestub + ".h5Labels.npz",allow_pickle=False)['data']
eventID = np.load(namedir + filestub + ".h5EventID.npz",allow_pickle=False)['data']
nevents = labels.shape[0]
for i, fn in enumerate(filenames):
f[fn] = sparse.load_npz(namedir + filestub + '.h5' + fn + ".npz")
canvas = np.zeros([nevents, 900, 1115], dtype=bool)
for iimg in range(nevents):
for ilab, irow in quartile_filter.iterrows():
img = f[ilab][iimg].todense()
fimg = maximum_filter(img > irow.qval, irow["filter"])
if irow.t: fimg = fimg.T
y = int(irow.y)
x = int(irow.x)
if irow.x == 850:
y += int(186 * irow.idq)
else:
y += int(372 * irow.idq)
canvas[iimg, x:x + fimg.shape[0], y:y + fimg.shape[1]] = fimg
return canvas, labels, eventID
# Output directory, change if needed
outdir = "/refactor_data/StageM1VS/SortieOPU_bdt/"
print("Output directory:", outdir)
ncomp=200000 #number of random features produced
typ='ttbar' #Process type ttbar or W
quartile_filter = pd.read_csv('/refactor_data/StageM1VS/Quartiles/quartile_filters_'+typ+'.csv',
index_col=0)
random_mapping = OPUMap(n_components=ncomp,
ndims=2
)
with random_mapping.opu:
for i, filestubn in enumerate(tqdm(range((len(os.listdir('/refactor_data/StageM1VS/SortieTASF_bdt')))//13))):
filestub = f"{filestubn}"
arr, labels, eventID = file2arr(filestub)
if i == 0:
OPUoutput = random_mapping.fit_transform(arr)
else:
OPUoutput = random_mapping.transform(arr)
np.savez_compressed(outdir + f"{filestub}.npz",
OPU=OPUoutput,
labels=labels, eventID=eventID)
# In[ ]: