In [None]:
!nvidia-smi

In [None]:
import torch
from pathlib import Path
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt

(Don't worry if importing h5py gives a warning)

In [None]:
from model.models import SimpleCNN4Layer_D35_sp as Model
from model.collectdata import collect_data, collect_truth
from model.plots import plot_truth_vs_predict
from model.efficiency import efficiency, ValueSet, pv_locations
from model.training import select_gpu
from model.core import modernize

#### Device configuration

In [None]:
# device = select_gpu(2)
device = torch.device("cpu")

#### Load a dataset

In [None]:
valfile = Path('/share/lazy/schreihf/PvFinder/Oct03_20K_val.h5')

In [None]:
validation = collect_data(valfile,
                          batch_size=1,
                          device=device,
                          slice=slice(10000))
                          

In [None]:
model = Model().to(device)

#### Load the model weights

Modernizing old style models if needed.

In [None]:
state = torch.load('/share/lazy/schreihf/PvFinder/models/Dec11_SimpleCNN4Layer_D35_sp_first200epochs_240K_lr_3em5_bs512_Alt_Loss_A_4p5_final.pyt')
state = modernize(state, 3)
model.load_state_dict(state)
model.eval()

#### Compute the model and move results to Numpy

In [None]:
%%time
with torch.no_grad():
    outputs = model(validation.dataset.tensors[0]).cpu().numpy()
    labels = validation.dataset.tensors[1].cpu().numpy()

#### Compute the efficencies over the dataset

In [None]:
%%time
total = ValueSet(0,0,0,0)

for label, output in zip(labels, outputs):
    total += efficiency(label, output, 5., 1e-2, .2, 3)

In [None]:
print(total)
print()
print(total.pretty())

#### Some examples of the counting algorithm:

In [None]:
for i in range(20):
    result = efficiency(labels[i], outputs[i], 5., 1e-2, .2, 3)
    print(format(i, '2'), result)

In [None]:
for i in range(20):
    result = efficiency(labels[i], outputs[i], 5., 1e-2,.2, 3)
    ax = plot_truth_vs_predict(labels[i], outputs[i])
    ax.set_title(str(result))

#### Adding nTracks

In [None]:
truth = collect_truth(valfile)

In [None]:
threshold = 1e-2
integral_threshold = .2
min_width = 3 # bins
difference = 5. # bins

In [None]:
i = 2

print(efficiency(labels[i], outputs[i], difference, threshold, integral_threshold, min_width))

found_values = pv_locations(outputs[i], threshold, integral_threshold, min_width)/10-100

valid = truth.n[i] > 4
zs = truth.z[i][valid]
ns = truth.n[i][valid]

print("Found nTracks Location\tDistance")
for z,n in zip(zs, ns):
    closest = np.min(np.abs(z - found_values))
    found = closest < difference/10
    print(f"{found!s:5} {n:7} {z:8.5}  {closest:8.5}")
    
target_values = pv_locations(labels[i], threshold, integral_threshold, min_width)/10-100
target_values

In [None]:
for i in range(100):

    eff = efficiency(labels[i], outputs[i], difference, threshold, integral_threshold, min_width)

    found_values = pv_locations(outputs[i], threshold, integral_threshold, min_width)/10-100

    valid = truth.n[i] > 4
    zs = truth.z[i][valid]
    ns = truth.n[i][valid]

    total_found = 0
    for z,n in zip(zs, ns):
        closest = np.min(np.abs(z - found_values))
        found = closest < difference/10
        total_found += found
        
    if total_found != eff.S:
        print(i, total_found, eff.S)

In [None]:
total_found = 0
eff_found = 0

pvs_successful = Counter()
pvs_failed = Counter()

for i in range(len(labels)):
    eff = efficiency(labels[i], outputs[i], difference, threshold, integral_threshold, min_width)

    found_values = pv_locations(outputs[i], threshold, integral_threshold, min_width)/10-100

    valid = truth.n[i] > 4
    zs = truth.z[i][valid]
    ns = truth.n[i][valid]

    for z,n in zip(zs, ns):
        if len(found_values) == 0:
            continue
        closest = np.min(np.abs(z - found_values))
        found = closest < difference/10
        total_found += found
        
        if found:
            pvs_successful[n] += 1
        else:
            pvs_failed[n] += 1
        
    eff_found += eff.S


In [None]:
(total_found - eff_found) / ((total_found + eff_found) / 2)

In [None]:
# Found 49507 of 54804, added 5570 (eff 90.33%) (0.557 FP/event)
print(total_found, eff_found)

In [None]:
arr_successful = np.array([pvs_successful[i] for i in range(100)])
arr_failed = np.array([pvs_failed[i] for i in range(100)])

In [None]:
arr_totals = arr_successful + arr_failed
arr_totals = np.ma.array(arr_totals, mask=arr_totals==0)

arr_eff = arr_successful / arr_totals

In [None]:
print(arr_eff[:61])
print(arr_totals[:61])

In [None]:
plt.figure(figsize=(10,10))

plt.subplot(211)
plt.bar(np.arange(61), arr_eff[:61], width=1, align='edge')
plt.xlabel('nTracks')
plt.ylabel('Efficiency')
plt.ylim(.6,1.01)
plt.xlim(5,61)
plt.grid()

plt.subplot(212)
plt.bar(np.arange(61), arr_totals[:61], width=1, align='edge')
plt.xlabel('nTracks')
plt.ylabel('PVs')
plt.xlim(5,61)
plt.grid()
plt.show()