# Postselection 
This notebook features a simplified version of the postselection step of VBS ssWW with one lepton and one handronic tau in the final state analysis. Events are furtherly filtered, the proper analysis is run and the varied histograms of 3 variables (lepton_pt, tau_pt, m_jj) are computed and saved, for each control region and each lepton flavour.

Import necessary libraries, setup remote reading and configure local/distributed execution.

In [1]:
%%capture

import ROOT
import os
from dask.distributed import Client
from utils import *
from variablesULbenchmark import *
from samplesUL import *
from distributed.diagnostics.plugin import UploadFile

os.environ["RUCIO_HOME"] = "/cvmfs/cms.cern.ch/rucio/current/"
os.environ['X509_CERT_DIR'] = "/cvmfs/grid.cern.ch/etc/grid-security/certificates/"
os.environ['X509_USER_PROXY'] = "/opt/workspace/persistent-storage/proxy"

variables_to_plot = ["m_jj","lepton_pt", "tau_pt"]
results_dir = "results_post_2"
e2e= True
#folder_e2e = "preselection_UL2017_v24_monitoring_remote_v2_O2_iter6"
folder_e2e = "preselection_workshop_3"
maxNfilespersample = 99999999  #lower this number just for debugging purposes
MT = False
distributed = True 
nmaxpartitions = 92*3 #used only in distributed mode
sched_port = 24585 #used only in distributed mode

if distributed != True and MT == True:
    ROOT.ROOT.EnableImplicitMT()
    
def set_proxy(dask_worker):
    import os
    import shutil
    working_dir = dask_worker.local_directory
    print(working_dir)
    os.environ['X509_USER_PROXY'] = working_dir + '/proxy'
    os.environ['X509_CERT_DIR']="/cvmfs/grid.cern.ch/etc/grid-security/certificates/"
    os.environ['EXTRA_CLING_ARGS'] = "-O2"
    try:
        os.chmod(working_dir + '/proxy', 0o400)
    except:
        pass
    
    return os.environ.get("X509_USER_PROXY")

Declare to the ROOT interpreter the header file which contains custom defined functions: this is done inside an inizialization function, which is necessary in the case of distributed execution (it is executed at the beginning of each task).

In [2]:
%%time
text_file = open("postselection_UL.h", "r")

data = text_file.read()
def my_initialization_function():
    ROOT.gInterpreter.Declare('{}'.format(data))
    
if distributed == True:
    RDataFrame = ROOT.RDF.Experimental.Distributed.Dask.RDataFrame
    client = Client(address="tcp://127.0.0.1:"+str(sched_port))
    client.restart()
    try:
        client.register_plugin(UploadFile("/opt/workspace/persistent-storage/proxy"))
    except:
        pass
    client.run(set_proxy)
    ROOT.RDF.Experimental.Distributed.initialize(my_initialization_function)
    
else:
    RDataFrame = ROOT.RDataFrame
    my_initialization_function()

CPU times: user 358 ms, sys: 76.7 ms, total: 435 ms
Wall time: 9.98 s


Analysis cuts and definitions are conveniently grouped in functions for the sake of readability. This also includes the booking of  final histograms and the definition of variations.

In [3]:
def filter_trigger(df):
    df_trigger = df.Filter("HLT_IsoMu27 ||  HLT_Ele35_WPTight_Gsf", "Leptonic trigger") ##UL2017
    return df_trigger

def select_jets(df):
    df_atleast2Jets = df.Filter("nJet>=2", "At least two jets")
    df_GoodJets = df_atleast2Jets.Define("GoodJets_idx", "GoodJets(Jet_jetId, Jet_eta, Jet_pt, Jet_puId)")
    df_atleast2GoodJets = df_GoodJets.Filter("atleast2GoodJets(GoodJets_idx)", "At least two good jets")
    df_VBSjets = df_atleast2GoodJets.Define("VBSJet_idx", "SelectVBSJets_invmass(Jet_pt, Jet_eta, Jet_phi, Jet_mass, GoodJets_idx)")
    df_2VBSjets = df_VBSjets.Filter("VBSJet_idx[0] != VBSJet_idx[1]", "2 VBS jets")
    df_jetsDefinitions = df_2VBSjets.Define("leadjet_pt", "GetLeading(Jet_pt, VBSJet_idx)")\
                                    .Define("leadjet_eta", "GetLeading(Jet_eta, VBSJet_idx)")\
                                    .Define("leadjet_phi", "GetLeading(Jet_phi, VBSJet_idx)")\
                                    .Define("leadjet_mass", "GetLeading(Jet_mass, VBSJet_idx)")\
                                    .Define("leadjet_DeepFlv_b", "GetLeading(Jet_btagDeepFlavB, VBSJet_idx)")\
                                    .Define("subleadjet_pt", "GetSubLeading(Jet_pt, VBSJet_idx)")\
                                    .Define("subleadjet_eta", "GetSubLeading(Jet_eta, VBSJet_idx)")\
                                    .Define("subleadjet_phi", "GetSubLeading(Jet_phi, VBSJet_idx)")\
                                    .Define("subleadjet_mass", "GetSubLeading(Jet_mass, VBSJet_idx)")\
                                    .Define("subleadjet_DeepFlv_b", "GetSubLeading(Jet_btagDeepFlavB, VBSJet_idx)")\

    return df_jetsDefinitions

def select_lepton(df):
    df_selectElectron = df.Define("Electron_idx", "SelectElectron(Electron_pt, Electron_eta, Electron_phi, Electron_jetRelIso, Electron_mvaFall17V2Iso_WPL, Electron_mvaFall17V2Iso_WP90, Jet_eta, Jet_phi, VBSJet_idx, Year)")
    df_selectMuon = df_selectElectron.Define("Muon_idx", "SelectMuon(Muon_corrected_pt, Muon_eta, Muon_phi, Muon_tightId, Muon_looseId, Muon_pfRelIso04_all, Jet_eta, Jet_phi, VBSJet_idx)")
    df_atLeast1Lepton = df_selectMuon.Filter("Electron_idx[1] != -1 || Muon_idx[1] != -1", "At least 1 at-least-loose lepton")
    df_goodLeptonFamily = df_atLeast1Lepton.Define("GoodLeptonFamily", "DetermineGoodLepton(HLT_IsoMu27, HLT_Mu50, HLT_Ele35_WPTight_Gsf, HLT_Ele32_WPTight_Gsf_L1DoubleEG, HLT_Photon200, HLT_PFHT250, HLT_PFHT350, Electron_idx, Electron_pt, Electron_eta, Electron_mvaFall17V2Iso_WPL, Electron_jetRelIso, Muon_idx, Muon_corrected_pt, Muon_eta, Muon_pfRelIso04_all, Muon_looseId)")
    df_compatibleLeptons = df_goodLeptonFamily.Filter("GoodLeptonFamily != -1 ", "Filter on leptons")
    df_leptonDefinitions = df_compatibleLeptons.Define("lepton_pt", "GetLepton(Electron_pt, Electron_idx, Muon_corrected_pt, Muon_idx, GoodLeptonFamily)")\
                                                .Define("lepton_eta", "GetLepton(Electron_eta, Electron_idx, Muon_eta, Muon_idx, GoodLeptonFamily)")\
                                                .Define("lepton_phi", "GetLepton(Electron_phi, Electron_idx, Muon_phi, Muon_idx, GoodLeptonFamily)")\
                                                .Define("lepton_mass", "GetLepton(Electron_mass, Electron_idx, Muon_mass, Muon_idx, GoodLeptonFamily)")\
                                                .Define("lepton_charge", "GetLepton(Electron_charge, Electron_idx, Muon_charge, Muon_idx, GoodLeptonFamily)")\
                                                .Define("lepton_TightRegion", "GetLeptonTightFlag(Electron_idx, Muon_idx, GoodLeptonFamily)")\
                                                .Define("lepton_LnTRegion", "1 - lepton_TightRegion")\
                                                .Define("lepton_Zeppenfeld", "Zeppenfeld(lepton_eta, leadjet_eta, subleadjet_eta)")\
                                                .Define("lepton_pdgid", "GetLepton(Electron_pdgId, Electron_idx, Muon_pdgId, Muon_idx, GoodLeptonFamily)")\
                                                .Define("lepton_pfRelIso04", "Get_isolation(Electron_jetRelIso, Electron_idx, Muon_pfRelIso04_all, Muon_idx, GoodLeptonFamily)")

    return df_leptonDefinitions

def select_tau(df):
    df_selectTau = df.Define("Tau_idx", "SelectAndVetoTaus(Tau_pt, Tau_eta, Tau_phi, Tau_idDeepTau2017v2p1VSjet, Tau_idDeepTau2017v2p1VSe, Tau_idDeepTau2017v2p1VSmu, GoodLeptonFamily, Electron_idx, Electron_eta, Electron_phi, Muon_idx, Muon_eta, Muon_phi, Jet_eta, Jet_phi, VBSJet_idx, Year)")
    df_1tau = df_selectTau.Filter("Tau_idx[1] != -1", "Exactly 1 at least loose Tau")
    df_tauDefinitions = df_1tau.Define("tau_pt", "GetTau(Tau_pt, Tau_idx)")\
                               .Define("tau_eta", "GetTau(Tau_eta, Tau_idx)")\
                               .Define("tau_phi", "GetTau(Tau_phi, Tau_idx)")\
                               .Define("tau_mass", "GetTau(Tau_mass, Tau_idx)")\
                               .Define("tau_charge", "GetTau(Tau_charge, Tau_idx)")\
                               .Define("tau_DecayMode", "GetTau(Tau_decayMode, Tau_idx)")\
                               .Define("tau_isolation", "GetTau(Tau_neutralIso, Tau_idx)")\
                               .Define("tau_DeepTau_WP", "pow(GetTau(Tau_idDeepTau2017v2p1VSjet, Tau_idx)*1000., 2) + GetTau(Tau_idDeepTau2017v2p1VSmu, Tau_idx)*1000. + GetTau(Tau_idDeepTau2017v2p1VSe, Tau_idx)")\
                               .Define("tau_DeepTauVsEle_WP", "GetLog2(GetTau(Tau_idDeepTau2017v2p1VSe, Tau_idx) + 1)")\
                               .Define("tau_DeepTauVsMu_WP", "GetLog2(GetTau(Tau_idDeepTau2017v2p1VSmu, Tau_idx) + 1)")\
                               .Define("tau_DeepTauVsJet_WP", "GetLog2(GetTau(Tau_idDeepTau2017v2p1VSjet, Tau_idx) + 1)")\
                               .Define("tau_Zeppenfeld", "Zeppenfeld(tau_eta, leadjet_eta, subleadjet_eta)")\
                               .Define("tau_TightRegion", "Tau_idx[1]")\
                               .Define("tau_LnTRegion", "1 - Tau_idx[1]")\
                               .Define("tau_jetIdx", "GetTau(Tau_jetIdx, Tau_idx)")\
                               .Define("tauleadTk_ptOverTau", "GetTau(Tau_leadTkPtOverTauPt, Tau_idx)")\
                               .Define("tau_DeepTauVsEle_raw", "GetTau(Tau_rawDeepTau2017v2p1VSe, Tau_idx)")\
                               .Define("tau_DeepTauVsMu_raw", "GetTau(Tau_rawDeepTau2017v2p1VSmu, Tau_idx)")\
                               .Define("tau_DeepTauVsJet_raw", "GetTau(Tau_rawDeepTau2017v2p1VSjet, Tau_idx)")

    return df_tauDefinitions

def scale_factors(df):
    #df_SF = df.Define("wzero", "1")
    df_SF = df.Define("lepSF", "GetLeptonSF(Electron_effSF, Electron_idx, Muon_effSF, Muon_idx, GoodLeptonFamily, IsMC)")\
              .Define("lepUp", "abs(GetLeptonSF(Electron_effSF_errUp, Electron_idx, Muon_effSF_errUp, Muon_idx, GoodLeptonFamily, IsMC))")\
              .Define("lepDown", "abs(GetLeptonSF(Electron_effSF_errDown, Electron_idx, Muon_effSF_errDown, Muon_idx, GoodLeptonFamily, IsMC))")\
              .Define("tauSF", "getTauSF(tau_pt, tau_eta, tau_genPartFlav, IsMC, Year)")\
              .Define("tau_vsjet_Down", "tauSF[0][0]")\
              .Define("tau_vsjet_SF", "tauSF[0][1]")\
              .Define("tau_vsjet_Up", "tauSF[0][2]")\
              .Define("tau_vsele_Down", "tauSF[1][0]")\
              .Define("tau_vsele_SF", "tauSF[1][1]")\
              .Define("tau_vsele_Up", "tauSF[1][2]")\
              .Define("tau_vsmu_Down", "tauSF[2][0]")\
              .Define("tau_vsmu_SF", "tauSF[2][1]")\
              .Define("tau_vsmu_Up", "tauSF[2][2]")\
              .Define("btagSFs", "btagcalc(GoodJets_idx, Jet_pt, Jet_eta, Jet_partonFlavour, Jet_btagDeepFlavB, Jet_btagSF_deepjet_M_up, Jet_btagSF_deepjet_M_down, Jet_btagSF_deepjet_M, Jet_btagDeepB, IsMC, Year)")\
              .Define("btagSF", "btagSFs[0]")\
              .Define("btagUp", "btagSFs[1]")\
              .Define("btagDown", "btagSFs[2]")\
              .Define("mistagUp", "btagSFs[3]")\
              .Define("mistagDown", "btagSFs[4]")\

    return df_SF

def general_definitions(df):
    
    df_definitions = df.Define("MET_pt", "MET_T1Smear_pt_vec[0]")\
                       .Define("m_jj", "GetInvMass(Jet_pt, Jet_eta, Jet_phi, Jet_mass, VBSJet_idx)")\
                       .Define("PFSF","PrefireWeight")\
                       .Define("PFUp","PrefireWeight_Up")\
                       .Define("PFDown","PrefireWeight_Down")\
                       .Define("puSF","puWeight")\
                       .Define("puDown","puWeightDown")\
                       .Define("puUp","puWeightUp")\
                       .Define("pass_charge_selection", "SameCharge(GoodLeptonFamily, Electron_idx, Electron_charge, Muon_idx, Muon_charge, Tau_idx, Tau_charge)")\
                       .Define("pass_b_veto", "BVeto(Jet_pt, Jet_eta, Jet_btagDeepFlavB, GoodJets_idx)")\
                       .Define("pass_b_veto_loose", "BVeto_loose(Jet_pt, Jet_eta, Jet_btagDeepFlavB, GoodJets_idx)")\
                       .Define("pass_MET_cut", "MET_pt > 50.")\
                       .Define("pass_mjj_cut", "m_jj > 500.")\

    return df_definitions

def book_histos(df, region, h, to_plot = []):
    h[region] = {} 
    for item in aggregated_samples.items():
        h[region][item[0]] = {}
    h[region]["Fake"] = {}
    for item in aggregated_samples.items():
        if item[0] in to_plot:
            filter_string = ""
            for s in item[1]:
                #print(s.name)
                #print(sampleDict[s.name])
                if filter_string == "":
                    filter_string += "Sample == {}".format(sampleDict[s.name])
                else:
                    filter_string += " || Sample == {}".format(sampleDict[s.name])
            #print(filter_string)
            for v in variables[region]:
                if not v._name in variables_to_plot:
                    continue
                df_sample = df.Filter(filter_string)
                if item[0] == "Data":
                    h[region]["Data"][v._name] = {}
                    h[region]["Fake"][v._name] = {}
                    h[region]['Data'][v._name]['etau'] = df_sample.Filter("GoodLeptonFamily==0")\
                                                       .Filter("event_SFFake_vsjet4 == 0.", "Both lepton and tau are prompt")\
                                                       .Histo1D(("h_" + v._name + "_" + region, v._title , v._nbins, v._bins), v._name)
                    h[region]['Fake'][v._name]['etau'] = df_sample.Filter("GoodLeptonFamily==0")\
                                                        .Filter("event_SFFake_vsjet4 > 0.", "At least one among lepton and tau is fake")\
                                                        .Histo1D(("h_" + v._name + "_" + region, v._title , v._nbins, v._bins), v._name, "event_SFFake_vsjet4")
                    h[region]['Data'][v._name]['mutau'] = df_sample.Filter("GoodLeptonFamily==1")\
                                                       .Filter("event_SFFake_vsjet2 == 0.", "Both lepton and tau are prompt")\
                                                       .Histo1D(("h_" + v._name + "_" + region, v._title , v._nbins, v._bins), v._name)
                    h[region]['Fake'][v._name]['mutau'] = df_sample.Filter("GoodLeptonFamily==1")\
                                                       .Filter("event_SFFake_vsjet2 > 0.", "At least one among lepton and tau is fake")\
                                                       .Histo1D(("h_" + v._name + "_" + region, v._title , v._nbins, v._bins), v._name, "event_SFFake_vsjet2")
                else:
                    h[region][item[0]][v._name] = {}
                    h[region][item[0]][v._name]['etau'] = df_sample.Filter("GoodLeptonFamily==0", "electron!").Filter("lepton_TightRegion==1", "tight electron!").Filter("tau_TightRegion==1", "tight tau!").Filter("(lepton_genPartFlav==1|| lepton_genPartFlav==15) && tau_genPartFlav==5", "prompt electron and tau!").Histo1D(("h_" + v._name + "_" + region, v._title , v._nbins, v._bins), v._name, "wzero")
                    h[region][item[0]][v._name]['mutau'] = df_sample.Filter("GoodLeptonFamily==1", "muon!").Filter("lepton_TightRegion==1", "tight_muon!").Filter("tau_TightRegion==1", "tight tau!").Filter("(lepton_genPartFlav==1|| lepton_genPartFlav==15) && tau_genPartFlav==5", "prompt muon and tau!").Histo1D(("h_" + v._name + "_" + region, v._title , v._nbins, v._bins), v._name, "wzero")
                
def produce_tau_SF(df):
    df_SF = df.Define("FESs", "getFES(Tau_eta, Tau_decayMode, Tau_genPartFlav, IsMC, Year)")\
               .Define("FESDown", "getFlattenedMatrixColumn(FESs, 3, 0)")\
               .Define("FESSF", "getFlattenedMatrixColumn(FESs, 3, 1)")\
               .Define("FESUp", "getFlattenedMatrixColumn(FESs, 3, 2)")\
               .Define("TESs", "getTES(Tau_pt, Tau_decayMode,Tau_genPartFlav, IsMC, Year)")\
               .Define("TESDown", "getFlattenedMatrixColumn(TESs, 3, 0)")\
               .Define("TESSF", "getFlattenedMatrixColumn(TESs, 3, 1)")\
               .Define("TESUp", "getFlattenedMatrixColumn(TESs, 3, 2)")
    return df_SF
                
def jet_tau_redefines(df):
    df_result = df.Redefine("Jet_pt", "Jet_pt_nom")\
                  .Redefine("Jet_mass", "Jet_mass_nom")
    return df_result


def energetic_variations(df):
    df_sys = df.Vary("TESSF", "RVec<RVec<float>>{TESDown, TESUp}", variationTags=["down", "up"], variationName="TES")\
               .Vary("FESSF", "RVec<RVec<float>>{FESDown, FESUp}", variationTags=["down", "up"], variationName="FES")\
               .Vary(["Jet_pt", "Jet_mass", "MET_T1Smear_pt_vec", "MET_T1Smear_phi_vec"], "RVec<RVec<RVec<float>>>{{Jet_pt_jerDown, Jet_pt_jerUp}, {Jet_mass_jerDown, Jet_mass_jerUp}, {MET_T1Smear_pt_jerDown_vec, MET_T1Smear_pt_jerUp_vec}, {MET_T1Smear_phi_jerDown_vec, MET_T1Smear_phi_jerUp_vec}}", variationTags=["down", "up"], variationName="jer")\
               .Vary(["Jet_pt", "Jet_mass", "MET_T1Smear_pt_vec", "MET_T1Smear_phi_vec"], "RVec<RVec<RVec<float>>>{{Jet_pt_jesTotalDown, Jet_pt_jesTotalUp}, {Jet_mass_jesTotalDown, Jet_mass_jesTotalUp}, {MET_T1Smear_pt_jesTotalDown_vec, MET_T1Smear_pt_jesTotalUp_vec}, {MET_T1Smear_phi_jesTotalDown_vec, MET_T1Smear_phi_jesTotalUp_vec}}", variationTags=["down", "up"], variationName="jesTotal")\
               

    return df_sys

def SF_variations(df):
    df_sys = df.Vary("PrefireWeight", "RVec<float>{PrefireWeight_Down, PrefireWeight_Up}", variationTags=["down", "up"], variationName="PF")\
               .Vary("puWeight", "RVec<float>{puWeightDown, puWeightUp}", variationTags=["down", "up"], variationName="pu")\
               .Vary("btagSF", "RVec<float>{btagDown, btagUp}", variationTags=["down", "up"], variationName="btag")\
               .Vary("lepSF", "RVec<float>{lepDown, lepUp}", variationTags=["down", "up"], variationName="lep")\
               .Vary("tau_vsjet_SF", "RVec<float>{tau_vsjet_Down, tau_vsjet_Up}", variationTags=["down", "up"], variationName="tau_vsjet")\
               .Vary("tau_vsele_SF", "RVec<float>{tau_vsele_Down, tau_vsele_Up}", variationTags=["down", "up"], variationName="tau_vsele")\
               .Vary("tau_vsmu_SF", "RVec<float>{tau_vsmu_Down, tau_vsmu_Up}", variationTags=["down", "up"], variationName="tau_vsmu")\
               .Vary("pdf_totalSF", "RVec<float>{pdf_totalDown, pdf_totalUp}", variationTags=["down", "up"], variationName="pdf_total")\
               .Vary("QCDScaleSF", "RVec<float>{QCDScaleDown, QCDScaleUp}", variationTags=["down", "up"], variationName="QCDScale")\
               .Vary("ISRSF", "RVec<float>{ISRDown, ISRUp}", variationTags=["down", "up"], variationName="ISR")\
               .Vary("FSRSF", "RVec<float>{FSRDown, FSRUp}", variationTags=["down", "up"], variationName="FSR")
    return df_sys

Define the samples to be analyzed. This makes use of auxiliary sample class defined in samplesUL.py

In [4]:
sampleDict = {'ZZtoLep_UL2016APV': 0,'ZZTo2L2Nu_UL2016APV': 1,'ZZTo4L_UL2016APV': 2,'GluGluToContinToZZTo4e_UL2016APV': 3,'GluGluToContinToZZTo2e2mu_UL2016APV': 4,'GluGluToContinToZZTo2e2tau_UL2016APV': 5,'GluGluToContinToZZTo2mu2nu_UL2016APV': 6,'GluGluToContinToZZTo4mu_UL2016APV': 7,'GluGluToContinToZZTo2mu2tau_UL2016APV': 8,'GluGluToContinToZZTo2tau2nu_UL2016APV': 9,'GluGluToContinToZZTo4tau_UL2016APV': 10,'GluGluToContinToZZTo2e2nu_UL2016APV': 11,'TT_UL2016APV': 12,'TT_SemiLep_UL2016APV': 13,'TT_Had_UL2016APV': 14,'TTTo2L2Nu_UL2016APV': 15,'TT_beff_UL2016APV': 16,'TVX_UL2016APV': 17,'TTGJets_UL2016APV': 18,'TTZToQQ_UL2016APV': 19,'TTZToLLNuNu_UL2016APV': 20,'TTWJetsToQQ_UL2016APV': 21,'TTWJetsToLNu_UL2016APV': 22,'tZq_ll_4f_UL2016APV': 23,'VG_UL2016APV': 24,'ZG_UL2016APV': 25,'WG_UL2016APV': 26,'WrongSign_UL2016APV': 27,'WWto2L2Nu_UL2016APV': 28,'GluGluToWWToENEN_UL2016APV': 29,'GluGluToWWToENMN_UL2016APV': 30,'GluGluToWWToENTN_UL2016APV': 31,'GluGluToWWToMNEN_UL2016APV': 32,'GluGluToWWToMNMN_UL2016APV': 33,'GluGluToWWToMNTN_UL2016APV': 34,'GluGluToWWToTNEN_UL2016APV': 35,'GluGluToWWToTNMN_UL2016APV': 36,'GluGluToWWToTNTN_UL2016APV': 37,'ST_tW_top_UL2016APV': 38,'ST_tW_antitop_UL2016APV': 39,'GluGluHToWWTo2L2Nu_UL2016APV': 40,'GluGluHToWWToLNuQQ_UL2016APV': 41,'GluGluHToZZTo4L_UL2016APV': 42,'GluGluHToTauTau_UL2016APV': 43,'VBFHToWWTo2L2Nu_UL2016APV': 44,'VBFHToTauTau_UL2016APV': 45,'ttHToNonbb_UL2016APV': 46,'VHToNonbb_UL2016APV': 47,'Triboson_UL2016APV': 48,'WWTo2L2Nu_DoubleScattering_UL2016': 49,'WWW_4F_UL2016APV': 50,'WWZ_4F_UL2016APV': 51,'WZZ_UL2016APV': 52,'ZZZ_UL2016APV': 53,'WWG_UL2016APV': 54,'WJets_UL2016APV': 55,'WJetsHT70to100_UL2016APV': 56,'WJetsHT100to200_UL2016APV': 57,'WJetsHT200to400_UL2016APV': 58,'WJetsHT400to600_UL2016APV': 59,'WJetsHT600to800_UL2016APV': 60,'WJetsHT800to1200_UL2016APV': 61,'WJetsHT1200to2500_UL2016APV': 62,'WJetsHT2500toInf_UL2016APV': 63,'WZ_UL2016APV': 64,'DYJetsToLL_UL2016APV': 65,'DYJetsToLL_FxFx_UL2016APV': 65,'DYJetsToLL_M10to50_UL2016APV': 66,'DYJetsToLL_M50_UL2016APV': 67,'DYJetsToLL_M50_FxFx_UL2016APV': 67,'DYJetsToLL_M50_UL2016APV_ext': 67,'WpWpJJ_EWK_UL2016APV': 68,'WpWpJJ_QCD_UL2016APV': 69,'VBS_SSWW_SM_UL2016APV': 70,'VBS_SSWW_LL_SM_UL2016APV': 71,'VBS_SSWW_TL_SM_UL2016APV': 72,'VBS_SSWW_TT_SM_UL2016APV': 73,'VBS_SSWW_cW_UL2016APV': 74,'VBS_SSWW_cW_SM_UL2016APV': 75,'VBS_SSWW_cW_BSM_UL2016APV': 76,'VBS_SSWW_cW_INT_UL2016APV': 77,'VBS_SSWW_cHW_UL2016APV': 78,'VBS_SSWW_cHW_SM_UL2016APV': 79,'VBS_SSWW_cHW_BSM_UL2016APV': 80,'VBS_SSWW_cHW_INT_UL2016APV': 81,'VBS_SSWW_cW_cHW_UL2016APV': 82,'VBS_SSWW_DIM6_UL2016APV': 83,'VBS_SSWW_DIM6_SM_UL2016APV': 84,'ZZtoLep_UL2016': 85,'ZZTo2L2Nu_UL2016': 86,'ZZTo4L_UL2016': 87,'GluGluToContinToZZTo4e_UL2016': 88,'GluGluToContinToZZTo2e2mu_UL2016': 89,'GluGluToContinToZZTo2e2tau_UL2016': 90,'GluGluToContinToZZTo2mu2nu_UL2016': 91,'GluGluToContinToZZTo4mu_UL2016': 92,'GluGluToContinToZZTo2mu2tau_UL2016': 93,'GluGluToContinToZZTo2tau2nu_UL2016': 94,'GluGluToContinToZZTo4tau_UL2016': 95,'GluGluToContinToZZTo2e2nu_UL2016': 96,'TT_UL2016': 97,'TT_SemiLep_UL2016': 98,'TT_Had_UL2016': 99,'TTTo2L2Nu_UL2016': 100,'TT_beff_UL2016': 101,'TVX_UL2016': 102,'TTGJets_UL2016': 103,'TTZToQQ_UL2016': 104,'TTZToLLNuNu_UL2016': 105,'TTWJetsToQQ_UL2016': 106,'TTWJetsToLNu_UL2016': 107,'tZq_ll_4f_UL2016': 108,'VG_UL2016': 109,'ZG_UL2016': 110,'WG_UL2016': 111,'WrongSign_UL2016': 112,'WWto2L2Nu_UL2016': 113,'GluGluToWWToENEN_UL2016': 114,'GluGluToWWToENMN_UL2016': 115,'GluGluToWWToENTN_UL2016': 116,'GluGluToWWToMNEN_UL2016': 117,'GluGluToWWToMNMN_UL2016': 118,'GluGluToWWToMNTN_UL2016': 119,'GluGluToWWToTNEN_UL2016': 120,'GluGluToWWToTNMN_UL2016': 121,'GluGluToWWToTNTN_UL2016': 122,'ST_tW_top_UL2016': 123,'ST_tW_antitop_UL2016': 124,'GluGluHToWWTo2L2Nu_UL2016': 125,'GluGluHToWWToLNuQQ_UL2016': 126,'GluGluHToZZTo4L_UL2016': 127,'GluGluHToTauTau_UL2016': 128,'VBFHToWWTo2L2Nu_UL2016': 129,'VBFHToTauTau_UL2016': 130,'ttHToNonbb_UL2016': 131,'VHToNonbb_UL2016': 132,'Triboson_UL2016': 133,'WWW_4F_UL2016': 134,'WWZ_4F_UL2016': 135,'WZZ_UL2016': 136,'ZZZ_UL2016': 137,'WWG_UL2016': 138,'WJets_UL2016': 139,'WJetsHT70to100_UL2016': 140,'WJetsHT100to200_UL2016': 141,'WJetsHT200to400_UL2016': 142,'WJetsHT400to600_UL2016': 143,'WJetsHT600to800_UL2016': 144,'WJetsHT800to1200_UL2016': 145,'WJetsHT1200to2500_UL2016': 146,'WJetsHT2500toInf_UL2016': 147,'WZ_UL2016': 148,'DYJetsToLL_UL2016': 149,'DYJetsToLL_FxFx_UL2016': 149,'DYJetsToLL_M10to50_UL2016': 150,'DYJetsToLL_M50_UL2016': 151,'DYJetsToLL_M50_FxFx_UL2016': 151,'DYJetsToLL_M50_UL2016_ext': 151,'WpWpJJ_EWK_UL2016': 152,'WpWpJJ_QCD_UL2016': 153,'VBS_SSWW_SM_UL2016': 154,'VBS_SSWW_LL_SM_UL2016': 155,'VBS_SSWW_TL_SM_UL2016': 156,'VBS_SSWW_TT_SM_UL2016': 157,'VBS_SSWW_cW_UL2016': 158,'VBS_SSWW_cW_SM_UL2016': 159,'VBS_SSWW_cW_BSM_UL2016': 160,'VBS_SSWW_cW_INT_UL2016': 161,'VBS_SSWW_cHW_UL2016': 162,'VBS_SSWW_cHW_SM_UL2016': 163,'VBS_SSWW_cHW_BSM_UL2016': 164,'VBS_SSWW_cHW_INT_UL2016': 165,'VBS_SSWW_cW_cHW_UL2016': 166,'VBS_SSWW_DIM6_UL2016': 167,'VBS_SSWW_DIM6_SM_UL2016': 168,'ZZtoLep_UL2017': 169,'ZZTo2L2Nu_UL2017': 170,'ZZTo4L_UL2017': 171,'GluGluToContinToZZTo4e_UL2017': 172,'GluGluToContinToZZTo2e2mu_UL2017': 173,'GluGluToContinToZZTo2e2tau_UL2017': 174,'GluGluToContinToZZTo2mu2nu_UL2017': 175,'GluGluToContinToZZTo4mu_UL2017': 176,'GluGluToContinToZZTo2mu2tau_UL2017': 177,'GluGluToContinToZZTo2tau2nu_UL2017': 178,'GluGluToContinToZZTo4tau_UL2017': 179,'GluGluToContinToZZTo2e2nu_UL2017': 180,'TT_UL2017': 181,'TT_SemiLep_UL2017': 182,'TT_Had_UL2017': 183,'TTTo2L2Nu_UL2017': 184,'TT_beff_UL2017': 185,'TVX_UL2017': 186,'TTGJets_UL2017': 187,'TTZToQQ_UL2017': 188,'TTZToLLNuNu_UL2017': 189,'TTWJetsToQQ_UL2017': 190,'TTWJetsToLNu_UL2017': 191,'tZq_ll_4f_UL2017': 192,'VG_UL2017': 193,'ZG_UL2017': 194,'WG_UL2017': 195,'WrongSign_UL2017': 196,'WWto2L2Nu_UL2017': 197,'GluGluToWWToENEN_UL2017': 198,'GluGluToWWToENMN_UL2017': 199,'GluGluToWWToENTN_UL2017': 200,'GluGluToWWToMNEN_UL2017': 201,'GluGluToWWToMNMN_UL2017': 202,'GluGluToWWToMNTN_UL2017': 203,'GluGluToWWToTNEN_UL2017': 204,'GluGluToWWToTNMN_UL2017': 205,'GluGluToWWToTNTN_UL2017': 206,'ST_tW_top_UL2017': 207,'ST_tW_antitop_UL2017': 208,'GluGluHToWWTo2L2Nu_UL2017': 209,'GluGluHToWWToLNuQQ_UL2017': 210,'GluGluHToZZTo4L_UL2017': 211,'GluGluHToTauTau_UL2017': 212,'VBFHToWWTo2L2Nu_UL2017': 213,'VBFHToTauTau_UL2017': 214,'ttHToNonbb_UL2017': 215,'VHToNonbb_UL2017': 216,'Triboson_UL2017': 217,'WWTo2L2Nu_DoubleScattering_UL2017': 218,'WWW_4F_UL2017': 219,'WWZ_4F_UL2017': 220,'WZZ_UL2017': 221,'ZZZ_UL2017': 222,'WWG_UL2017': 223,'WJets_UL2017': 224,'WJetsHT70to100_UL2017': 225,'WJetsHT100to200_UL2017': 226,'WJetsHT200to400_UL2017': 227,'WJetsHT400to600_UL2017': 228,'WJetsHT600to800_UL2017': 229,'WJetsHT800to1200_UL2017': 230,'WJetsHT1200to2500_UL2017': 231,'WJetsHT2500toInf_UL2017': 232,'WZ_UL2017': 233,'DYJetsToLL_UL2017': 234,'DYJetsToLL_FxFx_UL2017': 234,'DYJetsToLL_M10to50_UL2017': 235,'DYJetsToLL_M50_UL2017': 236,'DYJetsToLL_M50_FxFx_UL2017': 236,'DYJetsToLL_M50_UL2017_ext': 236,'WpWpJJ_EWK_UL2017': 237,'WpWpJJ_QCD_UL2017': 238,'VBS_SSWW_SM_UL2017': 239,'VBS_SSWW_LL_SM_UL2017': 240,'VBS_SSWW_TL_SM_UL2017': 241,'VBS_SSWW_TT_SM_UL2017': 242,'VBS_SSWW_cW_UL2017': 243,'VBS_SSWW_cW_SM_UL2017': 244,'VBS_SSWW_cW_BSM_UL2017': 245,'VBS_SSWW_cW_INT_UL2017': 246,'VBS_SSWW_cHW_UL2017': 247,'VBS_SSWW_cHW_SM_UL2017': 248,'VBS_SSWW_cHW_BSM_UL2017': 249,'VBS_SSWW_cHW_INT_UL2017': 250,'VBS_SSWW_cW_cHW_UL2017': 251,'VBS_SSWW_DIM6_UL2017': 252,'VBS_SSWW_DIM6_SM_UL2017': 253,'ZZtoLep_UL2018': 254,'ZZTo2L2Nu_UL2018': 255,'ZZTo4L_UL2018': 256,'GluGluToContinToZZTo4e_UL2018': 257,'GluGluToContinToZZTo2e2mu_UL2018': 258,'GluGluToContinToZZTo2e2tau_UL2018': 259,'GluGluToContinToZZTo2mu2nu_UL2018': 260,'GluGluToContinToZZTo4mu_UL2018': 261,'GluGluToContinToZZTo2mu2tau_UL2018': 262,'GluGluToContinToZZTo2tau2nu_UL2018': 263,'GluGluToContinToZZTo4tau_UL2018': 264,'GluGluToContinToZZTo2e2nu_UL2018': 265,'TT_UL2018': 266,'TT_SemiLep_UL2018': 267,'TT_Had_UL2018': 268,'TTTo2L2Nu_UL2018': 269,'TT_beff_UL2018': 270,'TVX_UL2018': 271,'TTGJets_UL2018': 272,'TTZToQQ_UL2018': 273,'TTZToLLNuNu_UL2018': 274,'TTWJetsToQQ_UL2018': 275,'TTWJetsToLNu_UL2018': 276,'tZq_ll_4f_UL2018': 277,'VG_UL2018': 278,'ZG_UL2018': 279,'WG_UL2018': 280,'WrongSign_UL2018': 281,'WWto2L2Nu_UL2018': 282,'GluGluToWWToENEN_UL2018': 283,'GluGluToWWToENMN_UL2018': 284,'GluGluToWWToENTN_UL2018': 285,'GluGluToWWToMNEN_UL2018': 286,'GluGluToWWToMNMN_UL2018': 287,'GluGluToWWToMNTN_UL2018': 288,'GluGluToWWToTNEN_UL2018': 289,'GluGluToWWToTNMN_UL2018': 290,'GluGluToWWToTNTN_UL2018': 291,'ST_tW_top_UL2018': 292,'ST_tW_antitop_UL2018': 293,'GluGluHToWWTo2L2Nu_UL2018': 294,'GluGluHToWWToLNuQQ_UL2018': 295,'GluGluHToZZTo4L_UL2018': 296,'GluGluHToTauTau_UL2018': 297,'VBFHToWWTo2L2Nu_UL2018': 298,'VBFHToTauTau_UL2018': 299,'ttHToNonbb_UL2018': 300,'VHToNonbb_UL2018': 301,'Triboson_UL2018': 302,'WWTo2L2Nu_DoubleScattering_UL2018': 303,'WWW_4F_UL2018': 304,'WWZ_4F_UL2018': 305,'WZZ_UL2018': 306,'ZZZ_UL2018': 307,'WWG_UL2018': 308,'WJets_UL2018': 309,'WJetsHT70to100_UL2018': 310,'WJetsHT100to200_UL2018': 311,'WJetsHT200to400_UL2018': 312,'WJetsHT400to600_UL2018': 313,'WJetsHT600to800_UL2018': 314,'WJetsHT800to1200_UL2018': 315,'WJetsHT1200to2500_UL2018': 316,'WJetsHT2500toInf_UL2018': 317,'WZ_UL2018': 318,'DYJetsToLL_UL2018': 319,'DYJetsToLL_FxFx_UL2018': 319,'DYJetsToLL_M10to50_UL2018': 320,'DYJetsToLL_M50_UL2018': 321,'DYJetsToLL_M50_FxFx_UL2018': 321,'DYJetsToLL_M50_UL2018_ext': 321,'WpWpJJ_EWK_UL2018': 322,'WpWpJJ_QCD_UL2018': 323,'VBS_SSWW_SM_UL2018': 324,'VBS_SSWW_LL_SM_UL2018': 325,'VBS_SSWW_TL_SM_UL2018': 326,'VBS_SSWW_TT_SM_UL2018': 327,'VBS_SSWW_cW_UL2018': 328,'VBS_SSWW_cW_BSM_UL2018': 329,'VBS_SSWW_cW_SM_UL2018': 330,'VBS_SSWW_cW_INT_UL2018': 331,'VBS_SSWW_cHW_UL2018': 332,'VBS_SSWW_cHW_SM_UL2018': 333,'VBS_SSWW_cHW_BSM_UL2018': 334,'VBS_SSWW_cHW_INT_UL2018': 335,'VBS_SSWW_cW_cHW_UL2018': 336,'VBS_SSWW_DIM6_UL2018': 337,'VBS_SSWW_DIM6_SM_UL2018': 338,'DataMu_UL2016APV': 339,'DataMuB1_UL2016APV': 340,'DataMuB2_UL2016APV': 341,'DataMuC_UL2016APV': 342,'DataMuD_UL2016APV': 343,'DataMuE_UL2016APV': 344,'DataMuF_UL2016APV': 345,'DataMu_UL2016': 346,'DataMuF_UL2016': 347,'DataMuG_UL2016': 348,'DataMuH_UL2016': 349,'DataMu_UL2017': 350,'DataMuB_UL2017': 351,'DataMuC_UL2017': 352,'DataMuD_UL2017': 353,'DataMuE_UL2017': 354,'DataMuF_UL2017': 355,'DataMu_UL2018': 356,'DataMuA_UL2018': 357,'DataMuB_UL2018': 358,'DataMuC_UL2018': 359,'DataMuD_UL2018': 360,'DataEle_UL2016APV': 361,'DataEleB1_UL2016APV': 362,'DataEleB2_UL2016APV': 363,'DataEleC_UL2016APV': 364,'DataEleD_UL2016APV': 365,'DataEleE_UL2016APV': 366,'DataEleF_UL2016APV': 367,'DataEle_UL2016': 368,'DataEleF_UL2016': 369,'DataEleG_UL2016': 370,'DataEleH_UL2016': 371,'DataEle_UL2017': 372,'DataEleB_UL2017': 373,'DataEleC_UL2017': 374,'DataEleD_UL2017': 375,'DataEleE_UL2017': 376,'DataEleF_UL2017': 377,'DataEle_UL2018': 378,'DataEleA_UL2018': 379,'DataEleB_UL2018': 380,'DataEleC_UL2018': 381,'DataEleD_UL2018': 382,'DataHT_UL2016APV': 383,'DataHTB1_UL2016APV': 384,'DataHTB2_UL2016APV': 385,'DataHTC_UL2016APV': 386,'DataHTD_UL2016APV': 387,'DataHTE_UL2016APV': 388,'DataHTF_UL2016APV': 389,'DataHT_UL2016': 390,'DataHTF_UL2016': 391,'DataHTG_UL2016': 392,'DataHTH_UL2016': 393,'DataHT_UL2017': 394,'DataHTB_UL2017': 395,'DataHTC_UL2017': 396,'DataHTD_UL2017': 397,'DataHTE_UL2017': 398,'DataHTF_UL2017': 399,'DataHT_UL2018': 400,'DataHTA_UL2018': 401,'DataHTB_UL2018': 402,'DataHTC_UL2018': 403,'DataHTD_UL2018': 404,'SampleHTFake_UL2016APV': 405,'SampleHTFake_UL2016': 406,'SampleHTFake_UL2017': 407,'SampleHTFake_UL2018': 408,}

aggregated_samples_UL2017 = {
    'VG':  [ZG_UL2017, WG_UL2017],
    'TVX': [TTGJets_UL2017, TTZToQQ_UL2017, TTZToLLNuNu_UL2017, TTWJetsToQQ_UL2017, TTWJetsToLNu_UL2017, tZq_ll_4f_UL2017], 
    'Triboson': [WWW_4F_UL2017, WWZ_4F_UL2017, WZZ_UL2017, ZZZ_UL2017, WWG_UL2017], 
    'TTTo2L2Nu': [TTTo2L2Nu_UL2017], 
    'WZ': [WZ_UL2017], 
    'DYJetsToLL_FxFx': [DYJetsToLL_M50_FxFx_UL2017],
    'WrongSign': [WWto2L2Nu_UL2017, GluGluToWWToENEN_UL2017, GluGluToWWToENMN_UL2017, GluGluToWWToENTN_UL2017, GluGluToWWToMNEN_UL2017, GluGluToWWToMNMN_UL2017, GluGluToWWToMNTN_UL2017, GluGluToWWToTNEN_UL2017, GluGluToWWToTNMN_UL2017, GluGluToWWToTNTN_UL2017, ST_tW_top_UL2017, 
                  ST_tW_antitop_UL2017, 
                  GluGluHToWWTo2L2Nu_UL2017, GluGluHToZZTo4L_UL2017, GluGluHToTauTau_UL2017, VBFHToWWTo2L2Nu_UL2017, VBFHToTauTau_UL2017, ttHToNonbb_UL2017, VHToNonbb_UL2017], 
    'ZZtoLep': [ZZTo2L2Nu_UL2017, ZZTo4L_UL2017, GluGluToContinToZZTo2e2nu_UL2017, GluGluToContinToZZTo2e2mu_UL2017, GluGluToContinToZZTo2e2tau_UL2017, GluGluToContinToZZTo2mu2nu_UL2017, GluGluToContinToZZTo2mu2tau_UL2017, GluGluToContinToZZTo4e_UL2017, GluGluToContinToZZTo4mu_UL2017, GluGluToContinToZZTo4tau_UL2017], 
    'VBS_SSWW_SM': [VBS_SSWW_LL_SM_UL2017, VBS_SSWW_TL_SM_UL2017, VBS_SSWW_TT_SM_UL2017], 
}

aggregated_samples = aggregated_samples_UL2017

Create the list of input files

In [5]:
ntasks_e2e = 276
if e2e == True:
    chain = []
    #url_e2e = "davs://t2-xrdcms.lnl.infn.it:2880/pnfs/lnl.infn.it/data/cms/store/user/ttedesch/"
    url_e2e = "root://t2-xrdcms.lnl.infn.it:7070//store/user/ttedesch/"
    for i in range(0,ntasks_e2e):
        chain.append(url_e2e + folder_e2e + "/preselectionUL_{}.root".format(i))

Build the computational graph. Results are grouped in a single dictionary.

In [6]:
h = {}
if distributed == True:
    #df = RDataFrame("Events", chain, npartitions=nmaxpartitions, daskclient=client)
    df = RDataFrame("Events", chain, npartitions=nmaxpartitions, daskclient=client) #when using root version with monitoring features (/cvmfs/images.dodas.infn.it/registry.hub.docker.com/dodasts/root-in-docker:ubuntu22-kernel-v1-monitoring)
else:
    #df = RDataFrame("Events", chain) #to run on all
    df = RDataFrame("Events", chain[0])    
    
df_type = df.Define("IsMC", "isMC(Sample)").Define("Year", "\"UL2017\"")
df_tau_SF = produce_tau_SF(df_type)
df_jet_tau_redefines = jet_tau_redefines(df_tau_SF).Define("MET_T1Smear_pt_vec", "RVec<float>{ (float) MET_T1Smear_pt}").Define("MET_T1Smear_phi_vec", "RVec<float>{ (float) MET_T1Smear_phi}")\
                                                   .Define("MET_T1Smear_pt_jerDown_vec", "RVec<float>{ (float) MET_T1Smear_pt_jerDown}").Define("MET_T1Smear_phi_jerDown_vec", "RVec<float>{ (float) MET_T1Smear_phi_jerDown}")\
                                                   .Define("MET_T1Smear_pt_jerUp_vec", "RVec<float>{ (float) MET_T1Smear_pt_jerUp}").Define("MET_T1Smear_phi_jerUp_vec", "RVec<float>{ (float) MET_T1Smear_phi_jerUp}")\
                                                   .Define("MET_T1Smear_pt_jesTotalDown_vec", "RVec<float>{ (float) MET_T1Smear_pt_jesTotalDown}").Define("MET_T1Smear_phi_jesTotalDown_vec", "RVec<float>{ (float) MET_T1Smear_phi_jesTotalDown}")\
                                                   .Define("MET_T1Smear_pt_jesTotalUp_vec", "RVec<float>{ (float) MET_T1Smear_pt_jesTotalUp}").Define("MET_T1Smear_phi_jesTotalUp_vec", "RVec<float>{ (float) MET_T1Smear_phi_jesTotalUp}")

df_sys = energetic_variations(df_jet_tau_redefines)


#trigger and VBS jets selections  and e/mu + tau final state
df_trigger = filter_trigger(df_sys)
df_jets = select_jets(df_trigger)
df_lepton = select_lepton(df_jets).Define("PassDataLeptonCheck", "DataLeptonCheck(Sample, GoodLeptonFamily, IsMC)").Filter("PassDataLeptonCheck == true", "DataLeptonCheck")\
                                  .Redefine("Tau_pt", "Tau_pt*FESSF*TESSF")\
                                  .Redefine("Tau_mass", "Tau_mass*FESSF*TESSF")\

df_tau = select_tau(df_lepton).Define("lepton_genPartFlav", "GetLepton(Electron_genPartFlav, Electron_idx, Muon_genPartFlav, Muon_idx, GoodLeptonFamily)")\
                              .Define("tau_genPartFlav", "GetTau(Tau_genPartFlav, Tau_idx)")\
                              .Define("lepton_isPrompt", "lepton_genPartFlav")\
                              .Define("tau_isPrompt", "tau_genPartFlav")

#df_corrections = corrections(df_tau)
df_definitions = general_definitions(df_tau)

df_SF_def= scale_factors(df_definitions).Define("pdf_total_weights", "PdfWeight_variations(LHEPdfWeight, Generator_weight, Sample)")\
                                        .Define("pdf_totalSF", "pdf_total_weights[0]")\
                                        .Define("pdf_totalUp", "pdf_total_weights[1]")\
                                        .Define("pdf_totalDown", "pdf_total_weights[2]")\
                                        .Define("QCDScale_weights", "QCDScale_variations(LHEScaleWeight)")\
                                        .Define("QCDScaleSF", "QCDScale_weights[0]")\
                                        .Define("QCDScaleUp", "QCDScale_weights[1]")\
                                        .Define("QCDScaleDown", "QCDScale_weights[2]")\
                                        .Define("ISRSF", "1.f")\
                                        .Define("FSRSF", "1.f")\
                                        .Define("PSWeight_weights", "PSWeight_variations(PSWeight)")\
                                        .Define("ISRUp", "PSWeight_weights[1]")\
                                        .Define("ISRDown", "PSWeight_weights[0]")\
                                        .Define("FSRUp", "PSWeight_weights[3]")\
                                        .Define("FSRDown", "PSWeight_weights[2]")\
        

df_SF_var = SF_variations(df_SF_def)

df_SF = df_SF_var.Define("w_SF", "PrefireWeight*puWeight*lepSF*tau_vsjet_SF*tau_vsele_SF*tau_vsmu_SF*btagSF*pdf_totalSF*QCDScaleSF*ISRSF*FSRSF")\
                 .Define("Nevents", "getNevents(Sample, IsMC)")\
                 .Define("HLT_effLumi", "getLumi(Year, IsMC)")\
                 .Define("xsec", "getXSec(Sample, IsMC)")\
                 .Define("w_nominal", "HLT_effLumi*xsec* 1000./Nevents")\
                 .Define("wzero", "w_SF*w_nominal")
    
df_SS = df_SF.Filter("pass_charge_selection == true", "SameSign")
df_OS = df_SF.Filter("pass_charge_selection == false", "OppositeSign")

df_SS_METCut_true = df_SS.Filter("pass_MET_cut == true", "MET cut true")
df_SS_METCut_false = df_SS.Filter("pass_MET_cut == false", "MET cut false")

df_OS_METCut_true = df_OS.Filter("pass_MET_cut == true", "MET cut true")

df_SS_METCut_true_bjets_true = df_SS_METCut_true.Filter("pass_b_veto == false", "Bveto")#.Filter(".Filter("nBJet > 0", "at least one b jets - medium")
df_SS_METCut_true_bjets_false = df_SS_METCut_true.Filter("pass_b_veto == true", "Bveto")#.Filter("nBJet == 0", "no b jets - medium")

df_OS_METCut_true_bjets_true = df_OS_METCut_true.Filter("pass_b_veto == false", "Bveto")#.Filter("nBJet > 0", "at least one b jets - medium")

to_plot_ = []
for item in aggregated_samples.items():
    if not item[0] in ["Fake", "Data", "WZ"]:
        to_plot_.append(item[0])

##### CR fakes
df_FAKES = df_SS_METCut_false
book_histos(df_FAKES, 'fakes_CR', h, to_plot = to_plot_)
##### SR 
df_SR = df_SS_METCut_true_bjets_false.Filter("pass_mjj_cut == true", "mjj cut")
book_histos(df_SR, 'SR', h, to_plot = to_plot_)
#### CR ttbar
df_TTBAR = df_OS_METCut_true_bjets_true
book_histos(df_TTBAR, 'ttbar_CR', h, to_plot = to_plot_)
#### CR opposite sign 
df_OPPOSITESIGN = df_OS.Filter("pass_b_veto_loose == true", "Bveto")
book_histos(df_OPPOSITESIGN, 'OS_CR_bvetoL', h, to_plot = to_plot_)

Extract varied results.

In [7]:
h_varied = {}
for region, samples_dict in h.items():
    h_varied[region] = {}
    for sample, histos_dict in samples_dict.items(): 
        h_varied[region][sample] = {}
        for feature, histo in histos_dict.items():
            h_varied[region][sample][feature] = {}
            for final_state in ['etau', 'mutau']:
                if distributed == True:
                    h_varied[region][sample][feature][final_state] = ROOT.RDF.Experimental.Distributed.VariationsFor(histo[final_state])
                else:
                    h_varied[region][sample][feature][final_state] = ROOT.RDF.Experimental.VariationsFor(histo[final_state])

Save varied histograms. This triggers the event loop.

In [8]:
%%time
import math

if results_dir not in os.listdir():
    os.mkdir("./" + results_dir)

variations = ["nominal", "jesTotal", "jer", "TES", "FES", "PF", "pu", "btag", "lep", "tau_vsjet", "tau_vsele", "tau_vsmu", "pdf_total", "QCDScale", "ISR", "FSR"]


if "plots" not in os.listdir("./" + results_dir):
    os.mkdir("./" + results_dir +"/" + "plots")
folder = "./"+ results_dir + "/plots"
if "electron" not in os.listdir(folder):
    os.mkdir(folder + "/" + "electron")
if "muon" not in os.listdir(folder):
    os.mkdir(folder + "/" + "muon")

def write_histo(h_, to_plot, isData = False):
    for lepton, final_state in zip(['electron', 'muon'], ['etau', 'mutau']):
        for region, samples_dict in h_.items():
            print(region, final_state)
            for sample, histos_dict in samples_dict.items():
                if not sample in to_plot:
                    continue
                file = ROOT.TFile(folder + "/" + lepton + "/" + "{}.root".format(sample), 'UPDATE')
                for n, var in enumerate(variations):
                    for feature, histo in histos_dict.items():
                        if n == 0:
                            if isData == False:
                                h1 = histo[final_state]['nominal'] 
                            else:
                                h1 = histo[final_state].GetValue()
                            nbins = h1.GetNbinsX()
                            h1.SetBinContent(1, h1.GetBinContent(0) + h1.GetBinContent(1))
                            h1.SetBinError(1, math.sqrt(pow(h1.GetBinError(0),2) + pow(h1.GetBinError(1),2)))
                            h1.SetBinContent(nbins, h1.GetBinContent(nbins) + h1.GetBinContent(nbins+1))
                            h1.SetBinError(nbins, math.sqrt(pow(h1.GetBinError(nbins),2) + pow(h1.GetBinError(nbins+1),2)))
                            for i in range(0, nbins + 1):          
                                if h1.GetBinContent(i) < 0:
                                    h1.SetBinContent(i, 0.)
                            if sample == "Fake":
                                for bidxx in range(nbins):          
                                    bidxx_l = bidxx + 1
                                    h1.SetBinError(bidxx_l, 0.3*h1.GetBinContent(bidxx_l))
                            h1.Write()
                        else:
                            if isData == False:
                                for var_type in ['up','down']:
                                    h1 = histo[final_state][var + ":" + var_type]
                                    h1.SetName(h1.GetName() + "_" + var + var_type.capitalize())
                                    nbins = h1.GetNbinsX()
                                    h1.SetBinContent(1, h1.GetBinContent(0) + h1.GetBinContent(1))
                                    h1.SetBinError(1, math.sqrt(pow(h1.GetBinError(0),2) + pow(h1.GetBinError(1),2)))
                                    h1.SetBinContent(nbins, h1.GetBinContent(nbins) + h1.GetBinContent(nbins+1))
                                    h1.SetBinError(nbins, math.sqrt(pow(h1.GetBinError(nbins),2) + pow(h1.GetBinError(nbins+1),2)))
                                    for i in range(0, nbins + 1):          
                                        if h1.GetBinContent(i) < 0:
                                            h1.SetBinContent(i, 0.)
                                    h1.Write()
                file.Close()
    print()

#write_histo(h_varied, [s_ for s_ in aggregated_samples.keys() ])

CPU times: user 161 μs, sys: 0 ns, total: 161 μs
Wall time: 233 μs


In [9]:
%%time
write_histo(h_varied, [s_ for s_ in aggregated_samples.keys() if not s_ in ["WZ"]])

fakes_CR etau
SR etau
ttbar_CR etau
OS_CR_bvetoL etau
fakes_CR mutau
SR mutau
ttbar_CR mutau
OS_CR_bvetoL mutau

CPU times: user 1.9 s, sys: 806 ms, total: 2.71 s
Wall time: 7min 44s
