In [1]:
# python
import sys
import importlib
# columnar analysis
from coffea import processor
from coffea.nanoevents import NanoAODSchema
import awkward as ak
from dask.distributed import Client, performance_report
# local
sidm_path = str(sys.path[0]).split("/sidm")[0]
if sidm_path not in sys.path: sys.path.insert(1, sidm_path)
from sidm.tools import utilities, sidm_processor, scaleout
# always reload local modules to pick up changes during development
importlib.reload(utilities)
importlib.reload(sidm_processor)
importlib.reload(scaleout)

<module 'sidm.tools.scaleout' from '/usr/local/lib/python3.10/site-packages/sidm/tools/scaleout.py'>

In [2]:
client = scaleout.make_dask_client("tls://localhost:8786")
client

0,1
Connection method: Direct,
Dashboard: /user/dongyub.lee@cern.ch/proxy/8787/status,

0,1
Comm: tls://192.168.121.138:8786,Workers: 0
Dashboard: /user/dongyub.lee@cern.ch/proxy/8787/status,Total threads: 0
Started: 5 hours ago,Total memory: 0 B


In [None]:
samples = [
    '2Mu2E_500GeV_5p0GeV_8p0mm',
    '2Mu2E_500GeV_5p0GeV_80p0mm',
]
fileset = utilities.make_fileset(samples, "llpNanoAOD_v2", max_files=-1, location_cfg="signal_2mu2e_v10.yaml")

p = sidm_processor.SidmProcessor(
    ["2mu2e"],
    ["base"],
)

# test if processor is serializable
import coffea.util as coffea_util
coffea_util.save(p, "processor.coffea")
print(coffea_util.load("processor.coffea"))

runner = processor.Runner(
    #executor=processor.IterativeExecutor(),
    executor=processor.DaskExecutor(client=client),
    schema=NanoAODSchema,
    #maxchunks=1,
    #skipbadfiles=True,
    savemetrics=True,
)

with performance_report(filename="dask-report.html"):
    output = runner.run(fileset, treename="Events", processor_instance=p)

<sidm.tools.sidm_processor.SidmProcessor object at 0x7f94689dcd30>
[                                        ] | 0% Completed |  1min 29.4s

In [None]:
out = output["out"]
print(out[samples[0]]['cutflow'])

In [5]:
# test that ouput is accumulatable
from coffea.processor import accumulate
double_out = accumulate([output["out"], output["out"]])

In [6]:
out[samples[0]]["cutflow"]["2mu2e"].print_table()
out[samples[0]]["cutflow"]["2mu2e"].print_table(fraction=True)

cut name         individual cut N    all cut N
-------------  ------------------  -----------
No selection             195420.0     195420.0
pass triggers             73253.0      73253.0
PV filter                190702.0      71300.0
>=2 LJs                   38243.0      15820.0
2mu2e                     38074.0      15755.0
cut name         individual %    marginal %    cumulative %
-------------  --------------  ------------  --------------
No selection            100.0         100.0           100.0
pass triggers            37.5          37.5            37.5
PV filter                97.6          97.3            36.5
>=2 LJs                  19.6          22.2             8.1
2mu2e                    19.5          99.6             8.1


In [7]:
double_out[samples[0]]["cutflow"]["2mu2e"].print_table()
double_out[samples[0]]["cutflow"]["2mu2e"].print_table(fraction=True)

cut name         individual cut N    all cut N
-------------  ------------------  -----------
No selection             390840.0     390840.0
pass triggers            146506.0     146506.0
PV filter                381404.0     142600.0
>=2 LJs                   76486.0      31640.0
2mu2e                     76148.0      31510.0
cut name         individual %    marginal %    cumulative %
-------------  --------------  ------------  --------------
No selection            100.0         100.0           100.0
pass triggers            37.5          37.5            37.5
PV filter                97.6          97.3            36.5
>=2 LJs                  19.6          22.2             8.1
2mu2e                    19.5          99.6             8.1


In [8]:
for k, v in output["metrics"].items():
    if k == "columns":
        print(f"{k}: {len(v)}")
    else:
        print(f"{k}: {v}")

bytesread: 407694936
columns: 292
entries: 327707
processtime: 658.3683433532715
chunks: 92
