In [1]:
%load_ext autoreload
%autoreload 2

import helper_functions as hf
from imports import *
import importlib
from sklearn.decomposition import PCA

num_available_cpus = multiprocessing.cpu_count()
print("Number of available CPUs:", num_available_cpus)

torch.cuda.empty_cache()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device =", device)
torch.set_default_tensor_type('torch.cuda.FloatTensor') if torch.cuda.is_available() else print ('cpu')

torch.set_num_threads(num_available_cpus)

print("Number of threads:", torch.get_num_threads())
print("Number of interop threads:", torch.get_num_interop_threads())

  from .autonotebook import tqdm as notebook_tqdm


Number of available CPUs: 80
Device = cuda:0
Number of threads: 80
Number of interop threads: 80


In [2]:
# Loading the background flow model + data
bkg_flow = hf.load_model(name="QCDBKG_clip10_NSRATQUAD_k6_hf120_nbpl4_tb10.pt")
bkgName = "QCDBKG"
bkg_data, bkg_unnorm, bkg_mass = hf.load_full("QCDBKG",num_batches=35)
bkg_mean = np.mean(bkg_unnorm,axis=0)
bkg_std = np.std(bkg_unnorm,axis=0)
n_bkg_tot = bkg_data.shape[0]
del bkg_data, bkg_unnorm, bkg_mass

In [3]:
# loading signal samples
signals_UL17 = ["Qstar2000_W400_UL17","Wp3000_B400_UL17","XYY_X3000_Y80_UL17"]
sig_datas = {}
sig_unnorms = {}
sig_masses = {}
for s in signals_UL17:
    sig, sig_unnorm, sig_mass = hf.load_full(s,bkg_mean=bkg_mean,bkg_std=bkg_std)
    sig_datas[s] = sig
    sig_unnorms[s] = sig_unnorm
    sig_masses[s] = sig_mass[:,0]

In [4]:
# making a dictionary of signal preselection efficiencies and qcd background total nEvents
from samples import sample_library_UL17, bkg_loc
info = {}
for s in signals_UL17:
    with h5py.File(sample_library_UL17[s],"r") as f:
        eff = f['preselection_eff'][()]
        info[s] = eff[0]
info['n_bkg_tot'] = n_bkg_tot
with open("data_forStats_mjjDecorrelate/info.json","w") as f:
    json.dump(info,f,indent=4)

In [5]:
# Qstar2000_W400_UL17 training
sigFlowName = "Qstar2000_W400_UL17_clip10_NSRATQUAD_k6_hf120_nbpl4_tb10.pt"
sig_flow = hf.load_model(name=sigFlowName)
sigName = "Qstar2000_W400_UL17"

# evaluate PCA decorrelation on a few bkg samples
pca = PCA()
n_bkg_files = 36
bkg_masses = np.array([])
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    bkg_masses = np.concatenate((bkg_masses,bkg_mass),axis=0)
    
    bkgtr_loss = -bkg_flow.eval_log_prob(bkg_data)[0]
    sigtr_loss = -sig_flow.eval_log_prob(bkg_data)[0]
    bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,bkgtr_loss),axis=0)
    bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,sigtr_loss),axis=0)
    
    del bkg_mass, bkg_data, bkg_unnorm_data

combined_bkg = np.concatenate((bkg_masses[:,np.newaxis],bkg_sigtr_losses[:,np.newaxis],bkg_bkgtr_losses[:,np.newaxis]),axis=1)
pca = pca.fit(combined_bkg)
del combined_bkg, bkg_masses, bkg_bkgtr_losses, bkg_sigtr_losses

# eval on backgrounds
n_bkg_files = 36
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,bkg_data,bkg_mass,"QCDBKG_{0}".format(i),0,transform=pca,outdir="data_forStats_mjjDecorrelate")
    del bkg_mass, bkg_data, bkg_unnorm_data
    
# loop over all signals and evaluate
for s in signals_UL17:
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,sig_datas[s],sig_masses[s],s,1,transform=pca,outdir="data_forStats_mjjDecorrelate")

# load data files back and make 2D quak space plots
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
dirName="data_forStats_mjjDecorrelate/sigTrain{0}_bkgTrain{1}".format(sigName,bkgName)
for f in os.listdir(dirName):
    if "QCDBKG" in f:
        arr = np.load(dirName+"/"+f)
        bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,arr[1]),axis=0)
        bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,arr[2]),axis=0)
        del arr

for s in signals_UL17:
    f = np.load(dirName+'/'+"eval_{0}.npy".format(s))
    sig_sigtr_losses = f[1]
    sig_bkgtr_losses = f[2]
    hf.sig_vs_bkg_2DQuakSpace(sigName,bkgName,
                              sig_bkgtr_losses,sig_sigtr_losses,
                              bkg_bkgtr_losses,bkg_sigtr_losses,
                              s,mjjDecorr=True)
    del sig_sigtr_losses, sig_bkgtr_losses

del bkg_bkgtr_losses, bkg_sigtr_losses

In [6]:
# Qstar2000_W400_UL17 + Wp3000_B400_UL17 training
sigFlowName = "Qstar2000_W400_UL17-and-Wp3000_B400_UL17_clip10_NSRATQUAD_k6_hf120_nbpl4_tb10.pt"
sig_flow = hf.load_model(name=sigFlowName)
sigName = "Qstar2000_W400_UL17-and-Wp3000_B400_UL17"

# evaluate PCA decorrelation on a few bkg samples
pca = PCA()
n_bkg_files = 36
bkg_masses = np.array([])
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    bkg_masses = np.concatenate((bkg_masses,bkg_mass),axis=0)
    
    bkgtr_loss = -bkg_flow.eval_log_prob(bkg_data)[0]
    sigtr_loss = -sig_flow.eval_log_prob(bkg_data)[0]
    bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,bkgtr_loss),axis=0)
    bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,sigtr_loss),axis=0)
    
    del bkg_mass, bkg_data, bkg_unnorm_data

combined_bkg = np.concatenate((bkg_masses[:,np.newaxis],bkg_sigtr_losses[:,np.newaxis],bkg_bkgtr_losses[:,np.newaxis]),axis=1)
pca = pca.fit(combined_bkg)
del combined_bkg, bkg_masses, bkg_bkgtr_losses, bkg_sigtr_losses

# eval on backgrounds
n_bkg_files = 36
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,bkg_data,bkg_mass,"QCDBKG_{0}".format(i),0,transform=pca,outdir="data_forStats_mjjDecorrelate")
    del bkg_mass, bkg_data, bkg_unnorm_data
    
# loop over all signals and evaluate
for s in signals_UL17:
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,sig_datas[s],sig_masses[s],s,1,transform=pca,outdir="data_forStats_mjjDecorrelate")

# load data files back and make 2D quak space plots
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
dirName="data_forStats_mjjDecorrelate/sigTrain{0}_bkgTrain{1}".format(sigName,bkgName)
for f in os.listdir(dirName):
    if "QCDBKG" in f:
        arr = np.load(dirName+"/"+f)
        bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,arr[1]),axis=0)
        bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,arr[2]),axis=0)
        del arr

for s in signals_UL17:
    f = np.load(dirName+'/'+"eval_{0}.npy".format(s))
    sig_sigtr_losses = f[1]
    sig_bkgtr_losses = f[2]
    hf.sig_vs_bkg_2DQuakSpace(sigName,bkgName,
                              sig_bkgtr_losses,sig_sigtr_losses,
                              bkg_bkgtr_losses,bkg_sigtr_losses,
                              s,mjjDecorr=True)
    del sig_sigtr_losses, sig_bkgtr_losses

del bkg_bkgtr_losses, bkg_sigtr_losses

In [7]:
# Qstar2000_W400_UL17 + Wp3000_B400_UL17 + XYY_X3000_Y80_UL17 training
sigFlowName = "Qstar2000_W400_UL17-and-Wp3000_B400_UL17-and-XYY_X3000_Y80_UL17_clip10_NSRATQUAD_k6_hf120_nbpl4_tb10.pt"
sig_flow = hf.load_model(name=sigFlowName)
sigName = "Qstar2000_W400_UL17-and-Wp3000_B400_UL17-and-XYY_X3000_Y80_UL17"

# evaluate PCA decorrelation on a few bkg samples
pca = PCA()
n_bkg_files = 36
bkg_masses = np.array([])
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    bkg_masses = np.concatenate((bkg_masses,bkg_mass),axis=0)
    
    bkgtr_loss = -bkg_flow.eval_log_prob(bkg_data)[0]
    sigtr_loss = -sig_flow.eval_log_prob(bkg_data)[0]
    bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,bkgtr_loss),axis=0)
    bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,sigtr_loss),axis=0)
    
    del bkg_mass, bkg_data, bkg_unnorm_data

combined_bkg = np.concatenate((bkg_masses[:,np.newaxis],bkg_sigtr_losses[:,np.newaxis],bkg_bkgtr_losses[:,np.newaxis]),axis=1)
pca = pca.fit(combined_bkg)
del combined_bkg, bkg_masses, bkg_bkgtr_losses, bkg_sigtr_losses

# eval on backgrounds
n_bkg_files = 36
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,bkg_data,bkg_mass,"QCDBKG_{0}".format(i),0,transform=pca,outdir="data_forStats_mjjDecorrelate")
    del bkg_mass, bkg_data, bkg_unnorm_data
    
# loop over all signals and evaluate
for s in signals_UL17:
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,sig_datas[s],sig_masses[s],s,1,transform=pca,outdir="data_forStats_mjjDecorrelate")

# load data files back and make 2D quak space plots
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
dirName="data_forStats_mjjDecorrelate/sigTrain{0}_bkgTrain{1}".format(sigName,bkgName)
for f in os.listdir(dirName):
    if "QCDBKG" in f:
        arr = np.load(dirName+"/"+f)
        bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,arr[1]),axis=0)
        bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,arr[2]),axis=0)
        del arr

for s in signals_UL17:
    f = np.load(dirName+'/'+"eval_{0}.npy".format(s))
    sig_sigtr_losses = f[1]
    sig_bkgtr_losses = f[2]
    hf.sig_vs_bkg_2DQuakSpace(sigName,bkgName,
                              sig_bkgtr_losses,sig_sigtr_losses,
                              bkg_bkgtr_losses,bkg_sigtr_losses,
                              s,mjjDecorr=True)
    del sig_sigtr_losses, sig_bkgtr_losses

del bkg_bkgtr_losses, bkg_sigtr_losses

In [8]:
 # Qstar2000_W400_UL17 + XYY_X3000_Y80_UL17 training
sigFlowName = "Qstar2000_W400_UL17-and-XYY_X3000_Y80_UL17_clip10_NSRATQUAD_k6_hf120_nbpl4_tb10.pt"
sig_flow = hf.load_model(name=sigFlowName)
sigName = "Qstar2000_W400_UL17-and-XYY_X3000_Y80_UL17"

# evaluate PCA decorrelation on a few bkg samples
pca = PCA()
n_bkg_files = 36
bkg_masses = np.array([])
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    bkg_masses = np.concatenate((bkg_masses,bkg_mass),axis=0)
    
    bkgtr_loss = -bkg_flow.eval_log_prob(bkg_data)[0]
    sigtr_loss = -sig_flow.eval_log_prob(bkg_data)[0]
    bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,bkgtr_loss),axis=0)
    bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,sigtr_loss),axis=0)
    
    del bkg_mass, bkg_data, bkg_unnorm_data

combined_bkg = np.concatenate((bkg_masses[:,np.newaxis],bkg_sigtr_losses[:,np.newaxis],bkg_bkgtr_losses[:,np.newaxis]),axis=1)
pca = pca.fit(combined_bkg)
del combined_bkg, bkg_masses, bkg_bkgtr_losses, bkg_sigtr_losses

# eval on backgrounds
n_bkg_files = 36
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,bkg_data,bkg_mass,"QCDBKG_{0}".format(i),0,transform=pca,outdir="data_forStats_mjjDecorrelate")
    del bkg_mass, bkg_data, bkg_unnorm_data
    
# loop over all signals and evaluate
for s in signals_UL17:
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,sig_datas[s],sig_masses[s],s,1,transform=pca,outdir="data_forStats_mjjDecorrelate")

# load data files back and make 2D quak space plots
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
dirName="data_forStats_mjjDecorrelate/sigTrain{0}_bkgTrain{1}".format(sigName,bkgName)
for f in os.listdir(dirName):
    if "QCDBKG" in f:
        arr = np.load(dirName+"/"+f)
        bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,arr[1]),axis=0)
        bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,arr[2]),axis=0)
        del arr

for s in signals_UL17:
    f = np.load(dirName+'/'+"eval_{0}.npy".format(s))
    sig_sigtr_losses = f[1]
    sig_bkgtr_losses = f[2]
    hf.sig_vs_bkg_2DQuakSpace(sigName,bkgName,
                              sig_bkgtr_losses,sig_sigtr_losses,
                              bkg_bkgtr_losses,bkg_sigtr_losses,
                              s,mjjDecorr=True)
    del sig_sigtr_losses, sig_bkgtr_losses

del bkg_bkgtr_losses, bkg_sigtr_losses

In [9]:
# Wp3000_B400_UL17 training
sigFlowName = "Wp3000_B400_UL17_clip10_NSRATQUAD_k6_hf120_nbpl4_tb10.pt"
sig_flow = hf.load_model(name=sigFlowName)
sigName = "Wp3000_B400_UL17"

# evaluate PCA decorrelation on a few bkg samples
pca = PCA()
n_bkg_files = 36
bkg_masses = np.array([])
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    bkg_masses = np.concatenate((bkg_masses,bkg_mass),axis=0)
    
    bkgtr_loss = -bkg_flow.eval_log_prob(bkg_data)[0]
    sigtr_loss = -sig_flow.eval_log_prob(bkg_data)[0]
    bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,bkgtr_loss),axis=0)
    bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,sigtr_loss),axis=0)
    
    del bkg_mass, bkg_data, bkg_unnorm_data

combined_bkg = np.concatenate((bkg_masses[:,np.newaxis],bkg_sigtr_losses[:,np.newaxis],bkg_bkgtr_losses[:,np.newaxis]),axis=1)
pca = pca.fit(combined_bkg)
del combined_bkg, bkg_masses, bkg_bkgtr_losses, bkg_sigtr_losses

# eval on backgrounds
n_bkg_files = 36
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,bkg_data,bkg_mass,"QCDBKG_{0}".format(i),0,transform=pca,outdir="data_forStats_mjjDecorrelate")
    del bkg_mass, bkg_data, bkg_unnorm_data
    
# loop over all signals and evaluate
for s in signals_UL17:
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,sig_datas[s],sig_masses[s],s,1,transform=pca,outdir="data_forStats_mjjDecorrelate")

# load data files back and make 2D quak space plots
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
dirName="data_forStats_mjjDecorrelate/sigTrain{0}_bkgTrain{1}".format(sigName,bkgName)
for f in os.listdir(dirName):
    if "QCDBKG" in f:
        arr = np.load(dirName+"/"+f)
        bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,arr[1]),axis=0)
        bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,arr[2]),axis=0)
        del arr

for s in signals_UL17:
    f = np.load(dirName+'/'+"eval_{0}.npy".format(s))
    sig_sigtr_losses = f[1]
    sig_bkgtr_losses = f[2]
    hf.sig_vs_bkg_2DQuakSpace(sigName,bkgName,
                              sig_bkgtr_losses,sig_sigtr_losses,
                              bkg_bkgtr_losses,bkg_sigtr_losses,
                              s,mjjDecorr=True)
    del sig_sigtr_losses, sig_bkgtr_losses

del bkg_bkgtr_losses, bkg_sigtr_losses

In [10]:
# Wp3000_B400_UL17 + XYY_X3000_Y80 training
sigFlowName = "Wp3000_B400_UL17-and-XYY_X3000_Y80_UL17_clip10_NSRATQUAD_k6_hf120_nbpl4_tb10.pt"
sig_flow = hf.load_model(name=sigFlowName)
sigName = "Wp3000_B400_UL17-and-XYY_X3000_Y80_UL17"

# evaluate PCA decorrelation on a few bkg samples
pca = PCA()
n_bkg_files = 36
bkg_masses = np.array([])
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    bkg_masses = np.concatenate((bkg_masses,bkg_mass),axis=0)
    
    bkgtr_loss = -bkg_flow.eval_log_prob(bkg_data)[0]
    sigtr_loss = -sig_flow.eval_log_prob(bkg_data)[0]
    bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,bkgtr_loss),axis=0)
    bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,sigtr_loss),axis=0)
    
    del bkg_mass, bkg_data, bkg_unnorm_data

combined_bkg = np.concatenate((bkg_masses[:,np.newaxis],bkg_sigtr_losses[:,np.newaxis],bkg_bkgtr_losses[:,np.newaxis]),axis=1)
pca = pca.fit(combined_bkg)
del combined_bkg, bkg_masses, bkg_bkgtr_losses, bkg_sigtr_losses

# eval on backgrounds
n_bkg_files = 36
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,bkg_data,bkg_mass,"QCDBKG_{0}".format(i),0,transform=pca,outdir="data_forStats_mjjDecorrelate")
    del bkg_mass, bkg_data, bkg_unnorm_data
    
# loop over all signals and evaluate
for s in signals_UL17:
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,sig_datas[s],sig_masses[s],s,1,transform=pca,outdir="data_forStats_mjjDecorrelate")

# load data files back and make 2D quak space plots
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
dirName="data_forStats_mjjDecorrelate/sigTrain{0}_bkgTrain{1}".format(sigName,bkgName)
for f in os.listdir(dirName):
    if "QCDBKG" in f:
        arr = np.load(dirName+"/"+f)
        bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,arr[1]),axis=0)
        bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,arr[2]),axis=0)
        del arr

for s in signals_UL17:
    f = np.load(dirName+'/'+"eval_{0}.npy".format(s))
    sig_sigtr_losses = f[1]
    sig_bkgtr_losses = f[2]
    hf.sig_vs_bkg_2DQuakSpace(sigName,bkgName,
                              sig_bkgtr_losses,sig_sigtr_losses,
                              bkg_bkgtr_losses,bkg_sigtr_losses,
                              s,mjjDecorr=True)
    del sig_sigtr_losses, sig_bkgtr_losses

del bkg_bkgtr_losses, bkg_sigtr_losses

In [11]:
# XYY_X3000_Y80_UL17 training
sigFlowName = "XYY_X3000_Y80_UL17_clip10_NSRATQUAD_k6_hf120_nbpl4_tb10.pt"
sig_flow = hf.load_model(name=sigFlowName)
sigName = "XYY_X3000_Y80_UL17"

# evaluate PCA decorrelation on a few bkg samples
pca = PCA()
n_bkg_files = 36
bkg_masses = np.array([])
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    bkg_masses = np.concatenate((bkg_masses,bkg_mass),axis=0)
    
    bkgtr_loss = -bkg_flow.eval_log_prob(bkg_data)[0]
    sigtr_loss = -sig_flow.eval_log_prob(bkg_data)[0]
    bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,bkgtr_loss),axis=0)
    bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,sigtr_loss),axis=0)
    
    del bkg_mass, bkg_data, bkg_unnorm_data

combined_bkg = np.concatenate((bkg_masses[:,np.newaxis],bkg_sigtr_losses[:,np.newaxis],bkg_bkgtr_losses[:,np.newaxis]),axis=1)
pca = pca.fit(combined_bkg)
del combined_bkg, bkg_masses, bkg_bkgtr_losses, bkg_sigtr_losses

# eval on backgrounds
n_bkg_files = 36
for i in range(n_bkg_files):
    bkg_data, bkg_unnorm_data, bkg_mass = hf.load_bkg_batch("QCDBKG",i)
    bkg_mass = bkg_mass[:,0]
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,bkg_data,bkg_mass,"QCDBKG_{0}".format(i),0,transform=pca,outdir="data_forStats_mjjDecorrelate")
    del bkg_mass, bkg_data, bkg_unnorm_data
    
# loop over all signals and evaluate
for s in signals_UL17:
    hf.evalAndSave(sig_flow,sigName,bkg_flow,bkgName,sig_datas[s],sig_masses[s],s,1,transform=pca,outdir="data_forStats_mjjDecorrelate")

# load data files back and make 2D quak space plots
bkg_bkgtr_losses = np.array([])
bkg_sigtr_losses = np.array([])
dirName="data_forStats_mjjDecorrelate/sigTrain{0}_bkgTrain{1}".format(sigName,bkgName)
for f in os.listdir(dirName):
    if "QCDBKG" in f:
        arr = np.load(dirName+"/"+f)
        bkg_sigtr_losses = np.concatenate((bkg_sigtr_losses,arr[1]),axis=0)
        bkg_bkgtr_losses = np.concatenate((bkg_bkgtr_losses,arr[2]),axis=0)
        del arr

for s in signals_UL17:
    f = np.load(dirName+'/'+"eval_{0}.npy".format(s))
    sig_sigtr_losses = f[1]
    sig_bkgtr_losses = f[2]
    hf.sig_vs_bkg_2DQuakSpace(sigName,bkgName,
                              sig_bkgtr_losses,sig_sigtr_losses,
                              bkg_bkgtr_losses,bkg_sigtr_losses,
                              s,mjjDecorr=True)
    del sig_sigtr_losses, sig_bkgtr_losses

del bkg_bkgtr_losses, bkg_sigtr_losses