In [1]:
# python
import sys
import importlib
# columnar analysis
from coffea import processor
import awkward as ak
from dask.distributed import Client, performance_report
# local
sidm_path = str(sys.path[0]).split("/sidm")[0]
if sidm_path not in sys.path: sys.path.insert(1, sidm_path)
from sidm.tools import utilities, sidm_processor, llpnanoaodschema, scaleout
# always reload local modules to pick up changes during development
importlib.reload(utilities)
importlib.reload(sidm_processor)
importlib.reload(llpnanoaodschema)
importlib.reload(scaleout)

<module 'sidm.tools.scaleout' from '/home/cms-jovyan/SIDM/sidm/tools/scaleout.py'>

In [2]:
client = scaleout.make_dask_client("tls://localhost:8786")
client

0,1
Connection method: Direct,
Dashboard: /user/bryan.cardwell@cern.ch/proxy/8787/status,

0,1
Comm: tls://192.168.197.237:8786,Workers: 0
Dashboard: /user/bryan.cardwell@cern.ch/proxy/8787/status,Total threads: 0
Started: 1 hour ago,Total memory: 0 B


In [3]:
samples = [
    '2Mu2E_500GeV_5p0GeV_8p0mm',
    '2Mu2E_500GeV_5p0GeV_80p0mm',
]
fileset = utilities.make_fileset(samples, "llpNanoAOD_v2", max_files=-1, location_cfg="signal_2mu2e_v10.yaml")

p = sidm_processor.SidmProcessor(
    ["2mu2e"],
    ["base"],
)

# test if processor is serializable
import coffea.util as coffea_util
coffea_util.save(p, "processor.coffea")
print(coffea_util.load("processor.coffea"))

runner = processor.Runner(
    #executor=processor.IterativeExecutor(),
    executor=processor.DaskExecutor(client=client),
    schema=llpnanoaodschema.LLPNanoAODSchema,
    #maxchunks=1,
    #skipbadfiles=True,
    savemetrics=True,
)

with performance_report(filename="dask-report.html"):
    output = runner.run(fileset, treename="Events", processor_instance=p)

<sidm.tools.sidm_processor.SidmProcessor object at 0x7fe835679fa0>
Signal not in xs cfg, assuming 1fb#######] | 100% Completed |  3min 46.7s[2K
Signal not in xs cfg, assuming 1fb


In [4]:
out = output["out"]
print(out[samples[0]]['cutflow'])

{'2mu2e': <sidm.tools.cutflow.Cutflow object at 0x7fe832f80cb0>}


In [5]:
# test that ouput is accumulatable
from coffea.processor import accumulate
double_out = accumulate([output["out"], output["out"]])

In [6]:
out[samples[0]]["cutflow"]["2mu2e"].print_table()
out[samples[0]]["cutflow"]["2mu2e"].print_table(fraction=True)

cut name         individual cut N    all cut N
-------------  ------------------  -----------
No selection                 59.8         59.8
pass triggers                22.4         22.4
PV filter                    59.8         22.4
>=2 LJs                      34.0         15.4
2mu2e                        33.9         15.4
cut name         individual %    marginal %    cumulative %
-------------  --------------  ------------  --------------
No selection            100.0         100.0           100.0
pass triggers            37.5          37.5            37.5
PV filter               100.0         100.0            37.5
>=2 LJs                  56.9          68.8            25.8
2mu2e                    56.7          99.8            25.7


In [7]:
double_out[samples[0]]["cutflow"]["2mu2e"].print_table()
double_out[samples[0]]["cutflow"]["2mu2e"].print_table(fraction=True)

cut name         individual cut N    all cut N
-------------  ------------------  -----------
No selection                119.7        119.7
pass triggers                44.9         44.9
PV filter                   119.7         44.9
>=2 LJs                      68.1         30.9
2mu2e                        67.8         30.8
cut name         individual %    marginal %    cumulative %
-------------  --------------  ------------  --------------
No selection            100.0         100.0           100.0
pass triggers            37.5          37.5            37.5
PV filter               100.0         100.0            37.5
>=2 LJs                  56.9          68.8            25.8
2mu2e                    56.7          99.8            25.7


In [8]:
for k, v in output["metrics"].items():
    if k == "columns":
        print(f"{k}: {len(v)}")
    else:
        print(f"{k}: {v}")

bytesread: 470681799
columns: 301
entries: 327707
processtime: 1953.3557016849518
chunks: 92
