In [1]:
import h5py
from sklearn.model_selection import train_test_split
import numpy as np

import os
os.chdir('..')

In [2]:
def normalize_features(particles, bmin, bmax, emin, emax):
    particles_n = np.ones([particles.shape[0], particles.shape[1], 4])
    idx_pt, idx_eta, idx_phi, idx_class = range(4)
    pt = particles[:,:,0]
    
    pt_norm = pt#pt/pt.sum(axis=1, keepdims=True)#(pt - bmin) / (bmax-bmin)



    #pt_norm = (pt - np.min(pt, axis = 1, keepdims=True)) / (np.max(pt, axis = 1, keepdims=True)-np.min(pt, axis=1, keepdims=True))

    particles_n[:,:,0] = pt_norm/pt.sum(axis=1, keepdims=True)#pt_norm*np.cos(particles[:,:,idx_phi])
    particles_n[:,:,1] = particles[:,:,idx_eta]#pt_norm*np.sin(particles[:,:,idx_phi])
    particles_n[:,:,2] = particles[:,:,idx_phi]#np.tanh(particles[:,:,idx_eta]) #(particles[:,:,idx_eta]-emin)/(emax-emin)#particles[:,:,idx_eta]#pt_norm*np.sinh(particles[:,:,idx_eta])

    particles_n[:,:,idx_class] = particles[:,:,idx_class]#(particles[:,:,idx_class] - np.min(particles[:,:,idx_class])) / (np.max(particles[:,:,idx_class])-np.min(particles[:,:,idx_class]))

    return particles_n    

In [3]:
def create_datasets_convolutional(bkg_file, output_bkg_name, signals_files, output_signal_names, events=None,
                                  test_size=0.4, val_size=0.1, input_shape=57):
    # read BACKGROUND data
    with h5py.File(bkg_file, 'r') as file:
        full_data = file['Particles'][:, :, :]
        print(full_data[0])
        np.random.shuffle(full_data)
        if events: full_data = full_data[:events, :, :]
    bmin = np.min(full_data[:,:,0])
    bmax = np.max(full_data[:,:,0])
    emin = np.min(full_data[:,:,2])
    emax = np.max(full_data[:,:,2])
    
    full_data = normalize_features(full_data, bmin, bmax, emin, emax)[:, :, :-1]


    # define training, test and validation datasets
    X_train, X_test = train_test_split(full_data, test_size=test_size, shuffle=True)
    X_train, X_val = train_test_split(X_train, test_size=val_size)

    X_train = np.reshape(X_train, (-1, 19, 3, 1))
    X_test = np.reshape(X_test, (-1, 19, 3, 1))
    X_val = np.reshape(X_val, (-1, 19, 3, 1))

    del full_data

    with h5py.File(output_bkg_name + '_dataset.h5', 'w') as h5f:
        h5f.create_dataset('X_train', data=X_train)
        h5f.create_dataset('X_test', data=X_test)
        h5f.create_dataset('X_val', data=X_val)               

    if signals_files:
        # read SIGNAL data
        for i, signal_file in enumerate(signals_files):
            f = h5py.File(signal_file, 'r')
            signal_data = np.reshape(normalize_features(f['Particles'][:,:,:], bmin, bmax, emin, emax)[:, :, :-1], (-1, 19, 3, 1))
            with h5py.File(output_signal_names[i] + '_dataset.h5', 'w') as h5f2:
                h5f2.create_dataset('Data', data=signal_data)
        

    return

In [4]:
if __name__ == '__main__':
    print("Start")
    signals_files = ["signals/Ato4l_lepFilter_13TeV_filtered.h5", "signals/hChToTauNu_13TeV_PU20_filtered.h5",
                     "signals/hToTauTau_13TeV_PU20_filtered.h5",
                     "signals/leptoquark_LOWMASS_lepFilter_13TeV_filtered.h5"]
    
    signal_names = ["signals/Ato4l", "signals/hChToTauNu", "signals/hToTauTau", "signals/LQtoBTau"]
    
    create_datasets_convolutional('signals/background_for_training.h5', 'signals/convolutional', signals_files, signal_names)
    print("Done")

Start
[[27.41296387  0.         -2.0499022   1.        ]
 [24.20996857  1.63350999 -0.10349621  2.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]]
Done
