In [1]:
import uproot
import numpy as np
from scipy.stats import norm
from tqdm import tqdm as progress_bar
from pathlib import Path

This is the processor function. It works by making a `mat` array that looks like:

    [[-100.3 -100.2 -100.1 -100.   -99.9  -99.8  -99.7]
     [-100.2 -100.1 -100.   -99.9  -99.8  -99.7  -99.6]]
     
This array is the edges of the bins, where `[0,:]` is the lower edge.
By simply adding this to a bin number, you can calculate the locations of
all element in parallel, for usage in the `cdf` function.

In [7]:
def proc(name, sd_1 = 0.1, in_dir = Path('/data/schreihf/PvFinder')):
    filename = in_dir / (name + '.root')
    print("Loading", filename)
    tree = uproot.open(str(filename))['kernel']
         
    X = tree['zdata'].array()
    pv_loc = tree['pv_loc'].array()
    pv_cat = tree['pv_cat'].array()
    #sv_loc = tree['sv_loc'].array()
    #sv_cat = tree['sv_cat'].array()
    
    N_vals = len(X)
    zvals_range = (-99.95, 299.95)
    Y = np.zeros([N_vals, 4000])
    edges = np.array([-0.05, 0.05])
    bins = np.arange(-3, 4)
    mat = 0.1*bins[np.newaxis,:] + edges[:,np.newaxis] - 99.95

    for i in progress_bar(range(N_vals), desc=name):
        # Centers of PVs
        pv_1 = pv_loc[i][pv_cat[i]==1]
        pv_1 = pv_1[(zvals_range[0] < pv_1) & (pv_1 < zvals_range[1])]
        
        for mean in pv_1:
            # Compute bin number
            N_bin = int(round((mean - zvals_range[0])*10))
            prob = norm(mean, sd_1)
                
            values = prob.cdf(N_bin/10 + mat)
            Y[i,bins + N_bin] += values[1] - values[0] 
                
    return X, Y

In [10]:
names = '''
kernel_20180509
kernel_20180510
kernel_20180515
kernel_20180516
kernel_20180522
kernel_20180719
kernel_20180720_1
kernel_20180720_2
'''.split()

Xs = []
Ys = []

for name in names:
    X, Y = proc(name)
    Xs.append(X)
    Ys.append(Y)
    
X = np.concatenate(Xs)
Y = np.concatenate(Ys)

Loading /data/schreihf/PvFinder/kernel_20180509.root


kernel_20180509: 100%|██████████| 5000/5000 [00:20<00:00, 246.12it/s]


Loading /data/schreihf/PvFinder/kernel_20180510.root


kernel_20180510: 100%|██████████| 10000/10000 [00:40<00:00, 246.58it/s]


Loading /data/schreihf/PvFinder/kernel_20180515.root


kernel_20180515: 100%|██████████| 10000/10000 [00:41<00:00, 241.43it/s]


Loading /data/schreihf/PvFinder/kernel_20180516.root


kernel_20180516: 100%|██████████| 10000/10000 [00:40<00:00, 245.45it/s]


Loading /data/schreihf/PvFinder/kernel_20180522.root


kernel_20180522: 100%|██████████| 10000/10000 [00:40<00:00, 247.31it/s]


Loading /data/schreihf/PvFinder/kernel_20180719.root


kernel_20180719: 100%|██████████| 10000/10000 [00:40<00:00, 246.82it/s]


Loading /data/schreihf/PvFinder/kernel_20180720_1.root


kernel_20180720_1: 100%|██████████| 10000/10000 [00:40<00:00, 248.24it/s]


Loading /data/schreihf/PvFinder/kernel_20180720_2.root


kernel_20180720_2: 100%|██████████| 10000/10000 [00:40<00:00, 246.88it/s]


In [11]:
np.savez_compressed('/data/schreihf/PvFinder/July_31_75000.npz', X=X, Y=Y)

In [12]:
X.shape, Y.shape

((75000, 4000), (75000, 4000))