In [1]:
import numpy as np
import matplotlib.pyplot as plt
import uproot
from onehotencoder import OneHotEncoder_int
import os
import pandas as pd

In [2]:
#featurevars = ['H_mass', 'H_relpt', 'H_ptt', 'Z_cos_theta', 'lep_cos_theta', 'lep_phi', 'l1g_deltaR', 'l2g_deltaR', 'gamma_relpt', 'gamma_ptRelErr', 'Z_lead_lepton_eta', 'Z_sublead_lepton_eta',  'regions']
featurevars = ['H_mass', 'H_relpt', 'Z_cos_theta', 'lep_cos_theta', 'regions']
rootfile_target='./datasets/data_run2.root'
rootfile_source='./datasets/ZGToLLG_run2.root'

def prepdata(filename):
    hzg = uproot.open(filename)
    hzgtree = hzg['inclusive']
    #iscategorical = [False, False, False, False, False, False, False, False, False, False, False, False, True]
    iscategorical = [False, False, False, False, True]
    _onehotencoder = OneHotEncoder_int(iscategorical)

    arrays = hzgtree.arrays(featurevars, library="pd")
    inputtmp = pd.DataFrame(arrays)
    inputtmp = inputtmp[(inputtmp["H_mass"]>105) & (inputtmp["H_mass"]<170)]

    inputnumpy = inputtmp.to_numpy(dtype=np.float32)
    inputs = _onehotencoder.encode(inputnumpy)
    ncats = _onehotencoder.ncats
    ncat_per_feature = _onehotencoder.categories_per_feature

    meanslist = []
    sigmalist = []
    currentcolumn = 0
    for ifeat, ncatfeat in zip(range(inputtmp.shape[1]), ncat_per_feature):
        if ncatfeat == 0: # fir float features, get mean and sigma
            mean = np.mean(inputnumpy[:, currentcolumn], axis=0, dtype=np.float32).reshape(1,1)
            
            sigma = np.std(inputnumpy[:, currentcolumn], axis=0, dtype=np.float32).reshape(1,1)
            
            #if sigma[0][0] < 2.0:
            #    mean = np.zeros(shape=(1, 1), dtype=np.float32)
            #    sigma = np.ones(shape=(1, 1), dtype=np.float32)
            meanslist.append(mean)
            sigmalist.append(sigma)
            currentcolumn += 1
        else: # categorical features do not get changed
            mean = np.zeros(shape=(1, ncatfeat), dtype=np.float32) 
            meanslist.append(mean)
            sigma = np.ones(shape=(1, ncatfeat), dtype=np.float32)
            sigmalist.append(sigma)
            currentcolumn += ncatfeat

    inputmeans = np.hstack(meanslist)
    inputsigma = np.hstack(sigmalist)

    normedinputs = (inputs-inputmeans) / inputsigma

    return inputtmp, normedinputs, inputmeans, inputsigma, ncat_per_feature

In [3]:
rawinputs_target, normedinputs_target, inputmeans_target, inputsigma_target, ncat_per_feature_target = prepdata(rootfile_target)
rawinputs_source, normedinputs_source, inputmeans_source, inputsigma_source, ncat_per_feature_source = prepdata(rootfile_source)
#print(ncat_per_feature_target)
inputdim = len(featurevars)-1
ncat_per_feature_target = ncat_per_feature_target
conddim = normedinputs_target.shape[1] - inputdim

In [4]:
print(inputmeans_target)
print(inputsigma_target)
print(inputmeans_source)
print(inputsigma_source)

[[ 1.3258777e+02  3.0680946e-01 -2.1967227e-03  1.9472409e-03
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00]]
[[17.013218    0.22716881  0.53971106  0.56857747  1.          1.
   1.          1.          1.          1.        ]]
[[ 1.3226889e+02  2.4296939e-01 -1.3178019e-03  2.7071238e-03
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00]]
[[17.066484    0.22968054  0.52643085  0.5831948   1.          1.
   1.          1.          1.          1.        ]]


In [None]:
mean = [[ 132,  3.0680946e-01 -2.1967227e-03  1.9472409e-03
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00]]

In [4]:
issignal_target = (rawinputs_target['regions']==0) # signal_selection 
issignal_source = (rawinputs_source['regions']==0) # signal_selection 
isbackground_target = ~issignal_target
isbackground_source = ~issignal_source
bkgnormed_target = normedinputs_target[isbackground_target]
bkgnormed_source = normedinputs_source[isbackground_source]
bkg_target = rawinputs_target[isbackground_target]
bkg_source = rawinputs_source[isbackground_source]   
#xmax = np.reshape(inputmeans + 5* inputsigma, inputmeans.shape[1])


In [14]:
issignal_target

0           True
1          False
2          False
4          False
5          False
           ...  
4394445    False
4394446    False
4394447    False
4394448    False
4394449    False
Name: regions, Length: 4038300, dtype: bool

In [5]:
xmax = np.reshape(inputmeans_source + 5* inputsigma_source, inputmeans_source.shape[1])
print(inputmeans_target)
print(inputsigma_target)
print(inputmeans_source)
print(inputsigma_source)
print(rawinputs_source)
print(normedinputs_source[0])
print(xmax)


[[134.65382    0.        17.259167   0.         0.         0.
    0.         0.         0.         0.         0.         0.
    0.         0.         0.         0.         0.         0.      ]]
[[19.731428  1.       18.837383  1.        1.        1.        1.
   1.        1.        1.        1.        1.        1.        1.
   1.        1.        1.        1.      ]]
[[134.28706    0.        16.722246   0.         0.         0.
    0.         0.         0.         0.         0.         0.
    0.         0.         0.         0.         0.         0.      ]]
[[19.81285   1.       18.001463  1.        1.        1.        1.
   1.        1.        1.        1.        1.        1.        1.
   1.        1.        1.        1.      ]]
             H_mass   H_relpt       H_ptt  Z_cos_theta  lep_cos_theta  \
0        121.306305  0.167297   20.289296    -0.206558      -0.920829   
1        165.168625  0.629479   12.799655     0.458501       0.450338   
2        111.513573  0.114854    0.308832

In [5]:
from ABCD_dnn_mmd import ABCDdnn

LRrange3 = [0.00001, 0.00001, 20000, 0]

nafdim=128
depth=2
batchsize=2000
seed=101

m = ABCDdnn(ncat_per_feature_target, inputdim, minibatch=batchsize, conddim=conddim, LRrange=LRrange3, \
        beta1=0.9, beta2=0.999, nafdim=nafdim, depth=depth, savedir='abcdnn_HZG', permute=True, retrain=False, seed=seed)

2024-04-09 11:39:37.525649: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2024-04-09 11:39:37.528158: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (lxslc711.ihep.ac.cn): /proc/driver/nvidia/version does not exist
2024-04-09 11:39:37.616658: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 10)]         0                                            
__________________________________________________________________________________________________
tf.__operators__.getitem_1 (Sli (None, 4)            0           input_2[0][0]                    
__________________________________________________________________________________________________
tf.convert_to_tensor (TFOpLambd (None, 4)            0           tf.__operators__.getitem_1[0][0] 
__________________________________________________________________________________________________
tf.compat.v1.gather (TFOpLambda (None, 4)            0           tf.convert_to_tensor[0][0]       
______________________________________________________________________________________________

AssertionError: No checkpoint specified (save_path=None); nothing is being restored.

In [7]:
m.setrealdata(bkgnormed_target, bkgnormed_source)
m.savehyperparameters()
m.monitorevery = 100

In [8]:
condlist = {
            "CR1": [[0., 1., 0., 0., 0., 0.]],
            "CR2": [[0., 0., 1., 0., 0., 0.]],
            "CR3": [[0., 0., 0., 1., 0., 0.]],
            "CR4": [[0., 0., 0., 0., 1., 0.]],
            "CR5": [[0., 0., 0., 0., 0., 1.]]
            }

steps = 300
m.train(steps,condlist)
m.display_training()


2024-04-09 11:34:36.072795: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


: 

: 

: 

In [None]:
ncol=4 # for plots below
condlist = {
            "SR": [[1., 0., 0., 0., ]],
            "CR1": [[0., 1., 0., 0., ]],
            "CR2": [[0., 0., 1., 0., ]],
            "CR3": [[0., 0., 0., 1., ]]
            }

select_data_target = {
    "SR": (rawinputs_target['regions']==0),
    "CR1": (rawinputs_target['regions']==1),
    "CR2": (rawinputs_target['regions']==2),
    "CR3": (rawinputs_target['regions']==3)
}

select_data_source = {
    "SR": (rawinputs_source['regions']==0),
    "CR1": (rawinputs_source['regions']==1),
    "CR2": (rawinputs_source['regions']==2),
    "CR3": (rawinputs_source['regions']==3)
}

select_data_source_transfered = {}

plottextlist=['SR','CR1','CR2','CR3']



In [None]:

#xin = normedinputs_source[select_data_source["CR2"]][0:1]
#print(cond_to_append, xin, condlist["CR2"])
#print(xin)
#m.model.predict(xin)

minibatch = batchsize

for r in ['CR2']:
    select_data_source_transfered[r] = []

    transferedlist = []
    
    nmcbatches = int(normedinputs_source.shape[0] / minibatch)
    nmcremain = normedinputs_source.shape[0] % minibatch

    for _ib in range(nmcbatches):
        xin = normedinputs_source[_ib*minibatch:(_ib+1)*minibatch]
        xgen = m.model.predict(xin)
        transferedlist.append(xgen)
    
    # last batch
    xin = normedinputs_source[nmcbatches*minibatch:]
    xgen = m.model.predict(xin)
    transferedlist.append(xgen)

    #xin = normedinputs_source[select_data_source[r]]
    #xgen = m.model.predict(xin)
    #transferedlist.append(xgen)
    
    transfered_data= np.vstack(transferedlist)
    transfered_data = transfered_data * inputsigma_source[:, :inputdim] + inputmeans_source[:, :inputdim]
    ntransfered_data = transfered_data.shape[0]
    print(ntransfered_data)

    select_data_source_transfered[r].append(transfered_data)



In [None]:
labelsindices = [['H_mass', 'H_mass', 105., 170, 100], 
                 ['H_pt', 'H_pt', 0.0, 100., 100],
                 ['Z_pt', 'Z_pt', 0.0, 100., 100], 
                 ['Z_eta', 'Z_eta', -4., 4., 50],
                 ['Z_phi', 'Z_phi', -4., 4., 50],
                 ['gamma_pt', 'gamma_pt', 0., 50., 100],
                 ['gamma_eta', 'gamma_eta', -4., 4., 50],
                 ['gamma_phi', 'gamma_phi', -4., 4., 50],
                 ['gamma_mvaID', 'gamma_mvaID', -1.,1., 50],
                 ['n_jets', 'n_jets', 0., 5., 5],
                 ['n_leptons', 'n_leptons', 0., 6., 6]]
runplots = True

if runplots:
    yscales = ['log', 'linear']
    for yscale in yscales:
        for li in labelsindices:
            pos = featurevars.index(li[1])
            nbins = li[-1]
            
            plts = {}

            for r in plottextlist:
                #print(rawinputs_target[select_data_target[r]])
                target_data = rawinputs_target[select_data_target[r]]
                source_data = rawinputs_source[select_data_source[r]]
                source_transfered_data = select_data_source_transfered[r][0]

                #plt.figure(figsize=(6, 3))
                plts[r] = plt.subplot(2, 2, plottextlist.index(r)+1)
                #ax[row,col].set_xlabel(f"${li[0]}$")
                plts[r].hist(target_data[li[1]], bins=nbins, alpha=0.5, range=(li[2], li[3]), histtype='bar', density=True, label='target')
    
                hist1, bins = np.histogram(source_transfered_data[:,pos],bins=nbins, range=(li[2], li[3]), density=True)
                scale = len(source_transfered_data[:,pos]) / sum(hist1)
                err = np.sqrt(hist1 * scale) / scale
                center = (bins[:-1] + bins[1:]) / 2
                plts[r].errorbar(center, hist1, yerr=err, fmt='.', c='r', markersize=8,capthick=0, label='transfermed')

                plts[r].hist(source_data[li[1]], bins=nbins, alpha=0.5, range=(li[2], li[3]), histtype='step', density=True, label='source')
                plts[r].yscale(yscale)
                plts[r].title(r)
                plts[r].xlabel(li[1])
                plts[r].legend(loc="upper right")
                
            #plt.tight_layout(pad=1.5)
            plt.show()
            plt.close()
            #fig.tight_layout()
            #fig.savefig(os.path.join(savedir, f'result_matrix_{li[1]}_{yscale}.pdf'))

In [1]:
import HZGammaAna

ModuleNotFoundError: No module named 'HZGammaAna'

In [2]:
LRrange3 = [0.00001, 0.00001, 20000, 0]

nafdim=128
depth=2
batchsize=512
seed=101

HZGammaAna.train_and_validate(steps=200, LRrange=LRrange3, minibatch=batchsize, beta1=0.9, beta2=0.999, \
    savedir='test/', nafdim=nafdim, depth=depth, seed=seed, permute=True, retrain=False, train=True)


[0, 0, 5]
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 7)]          0                                            
__________________________________________________________________________________________________
tf.__operators__.getitem_1 (Sli (None, 2)            0           input_2[0][0]                    
__________________________________________________________________________________________________
tf.convert_to_tensor (TFOpLambd (None, 2)            0           tf.__operators__.getitem_1[0][0] 
__________________________________________________________________________________________________
tf.compat.v1.gather (TFOpLambda (None, 2)            0           tf.convert_to_tensor[0][0]       
____________________________________________________________________________________

: 

: 

: 

In [None]:
import matplotlib.pyplot as plt

plottextlist=['SR','CR1','CR2','CR3']
xmax = np.reshape(inputmeans_source + 5* inputsigma_source, inputmeans_source.shape[1])
labelsindices_unnorm = [['H_mass', 'H_mass', -2., 4, 50], 
                 ['H_pt', 'H_pt', -2., 4., 50],
                 ['Z_pt', 'Z_pt', -2., 4, 100], 
                 #['Z_eta', 'Z_eta', -4., 4., 50],
                 #['Z_phi', 'Z_phi', -4., 4., 50],
                 ['gamma_pt', 'gamma_pt', -2., 4, 100],
                 #['gamma_eta', 'gamma_eta', -4., 4., 50],
                 #['gamma_phi', 'gamma_phi', -4., 4., 50],
                 ['gamma_mvaID', 'gamma_mvaID', -2., 4, 50],
                 ['n_jets', 'n_jets', -2., 4, 5],
                 ['n_leptons', 'n_leptons', -2., 4, 6]]

for li in labelsindices_unnorm:
    pos = featurevars.index(li[1])
    nbins = li[-1]
    plts={}
    for r in plottextlist:
        plts[r] = plt.subplot(2, 2, plottextlist.index(r)+1)
        plts[r].hist(normedinputs_target[select_data_target[r]][:,pos], bins=50, range=(li[2], li[3]), alpha=0.5, histtype='bar', density=True, label='target')
        plts[r].hist(normedinputs_source[select_data_source[r]][:,pos], bins=50, range=(li[2], li[3]), alpha=0.5, histtype='step', density=True, label='source')

        plt.yscale('linear')
        plts[r].set_title(r)
        plts[r].set_xlabel(r)
        plts[r].legend(loc="upper right")

    plt.tight_layout()
    plt.show()
    plt.close()

In [None]:
from scipy.stats import f

def getprob(chi2_1, chi2_2,n1,n2):
    nbins = 260.
    dn = n2-n1

    F12 = ((chi2_1-chi2_2)/(n2-n1))/(chi2_2/(nbins-n2))
    print("F12:", F12)

    return 1.0 - f.cdf(F12, dn, nbins-n2)

#getprob(3412.73, 344.214, 6,7)

#chis = [3412.73, 344.214, 271.932, 267.408] #untag4
#chis = [3907.04, 392.668, 255.96, 255.946] #untag3
#chis = [4919.99, 441.402, 264.348, 260.458] #untag2
#chis = [2701.9, 359.557, 267.88, 267.932] #untag1

#chis = [3369.1, 341.605, 271.545, 266.973] #untag4
#chis = [3816.55, 388.357, 255.601, 255.281] #untag3
#chis = [4752.9, 428.133, 262.87, 258.668] #untag2
#chis = [2715.07, 346.441, 266.652, 266.519] #untag2

## twojet NLL
#chis = [284.721, 278.601, 284.607] #untag2
#chis = [165.349, 166.681] #untag2
#chis = [141.778, 142.048, 141.887] #untag2
# Chi2
#chis = [226.943, 219.101, 219.03] #cat0
#chis = [138.53, 137.928, 137.928] #cat1
#chis = [169.473, 170.424] #cat2
#chis = [289.029, 287.991, 292.646] #cat3

#chis = [2870.45, 358.337, 268.227, 268.608] #cat3
#chis = [276.002, 229.783, 229.239] #cat3
#chis = [351.683, 227.22, 225.7] #cat2
chis = [520.252, 354.797, 355.799] #cat1
#chis = [759.219, 622.462, 609.966, 607.806] #cat0

for i in range(len(chis)-1):
    print(getprob(chis[i], chis[i+1], i+5,i+6))