In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

#import pandas as pd
import modin.pandas as pd

from tqdm import tqdm
from scipy.special import erfc

from scipy.optimize import curve_fit
from scipy import stats

import warnings
warnings.filterwarnings('ignore')

import cm_xml_to_matplotlib as cm
mycmap = cm.make_cmap('mellow-rainbow.xml')
cmap = plt.get_cmap(mycmap)


In [2]:
def model(x, a, b, c):
    """
    a => sigma_l + sigma_t
    b => epsilon*sigma_tt
    c => Sqrt(2epsilon(1+epsilon))* sigma_lt
    """
    f = a + b * np.cos(2*x) + c * np.cos(x)
    return f

def degauss(x, A, mu, sigma, lambda1, lambda2):
    mu1 = sigma * sigma * lambda1 + x - mu
    mu2 = -sigma * sigma * lambda2 + x - mu
    ret = A * 0.5 / (1.0 / lambda1 + 1.0 / lambda2) * \
        (np.exp(0.5 * np.power(sigma * lambda1, 2) + lambda1 * (x - mu)) * erfc(mu1 / (sigma * np.sqrt(2.0))) \
         + np.exp(0.5 * np.power(sigma * lambda2, 2) - lambda2 * (x - mu)) * erfc(-mu2 / (sigma * np.sqrt(2.0))))

    return ret

def gauss(x, A, mu, sig):
    ret = np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.)))
    return A*ret

def peak(x, c):
    return np.exp(-np.power(x - c, 2) / 16.0)

def lin_interp(x, y, i, half):
    return x[i] + (x[i+1] - x[i]) * ((half - y[i]) / (y[i+1] - y[i]))

def half_max_x(x, y):
    half = np.max(y)/2.0
    signs = np.sign(np.add(y, -half))
    zero_crossings = (signs[0:-2] != signs[1:-1])
    zero_crossings_i = np.where(zero_crossings)[0]
    return [lin_interp(x, y, zero_crossings_i[0], half),
            lin_interp(x, y, zero_crossings_i[1], half)]


In [None]:
%%time


names = [
    "electron_sector",
    "w",
    "q2",
    "theta",
    "phi",
    "mm2",
    "helicty",
    "type"
]
dtype = {
    "electron_sector": "int8",
    "helicty": "int8",
    "w": "float32",
    "q2": "float32",
    "theta": "float32",
    "phi": "float32",
    "mm2": "float32",
}

rec = pd.read_csv("/Users/tylern/Data/e1d/data/data_e1d.csv", names=names, dtype=dtype, index_col=False)

#rec = pd.read_feather("/Users/tylern/Data/ntuple/data_e1f.feather")

rec = rec[(rec.w > 0) & (rec.mm2 > 0.5) & (rec.mm2 < 1.5)]
rec.drop(["type","hash"], axis=1, inplace=True)
rec['cos_theta'] = np.cos(rec.theta)


rec.head()


In [None]:
rec.info(verbose=False, memory_usage="deep")

In [None]:
fig, ax = plt.subplots(figsize=(12,9))
ax.hist(rec.theta, bins=100, alpha=0.5, label='data', density=True)
ax.legend()
plt.show()

fig, ax = plt.subplots(figsize=(12,9))
ax.hist(rec.cos_theta, bins=100, alpha=0.5, label='data', density=True)

ax.legend()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12,9))
ax.hist(rec.phi, bins=100, alpha=0.5,  label='$\phi^{*}$ data', density=True)
ax.legend()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12,9))

ax.hist(rec.mm2, bins=100, alpha=0.5, label='mc_rec', density=True)

ax.legend()
plt.show()

In [None]:
def mm_cut(df):
    NSIGMA = 3
    data = {}
    for sec in range(1,7):
        plt.figure(figsize=(12,9))
        y,x = np.histogram(df[df.electron_sector == sec].mm2, bins=500, density=True)
        x=(x[1:]+x[:-1])/2
        popt_g, pcov_g = curve_fit(gauss,x,y, maxfev=8000)
        plt.plot(x, gauss(x,*popt_g), linewidth=2.0)
        plt.errorbar(x,y,yerr=stats.sem(y), fmt='.', zorder=1)

        plt.axvline(popt_g[1]+NSIGMA*popt_g[2])
        plt.axvline(popt_g[1]-NSIGMA*popt_g[2])
        
        p0 = [popt_g[0],popt_g[1],popt_g[2], 1.0,1.0]
        popt, pcov = curve_fit(degauss,x,y, maxfev=8000)
        
        plt.plot(x, degauss(x,*popt), c='#9467bd', linewidth=2.0)
        
        # find the FWHM
        xs= np.linspace(0.7,1.5,100000)
        hmx = half_max_x(xs, degauss(xs,*popt))
        fwhm = hmx[1] - hmx[0]
        plt.axvline(popt[1]+NSIGMA*fwhm/2.355, c='#9467bd')
        plt.axvline(popt[1]-NSIGMA*fwhm/2.355, c='#9467bd')
        
        plt.show()

        data[sec] = (popt_g[1]+NSIGMA*popt_g[2], popt_g[1]-NSIGMA*popt_g[2])
        
        print('{', end='')
        for x in popt_g:
            print(f' {x:.20f},', end='')
        print('}')

    return data

In [None]:
sector_cuts = mm_cut(rec)

In [None]:
cuts = False
mc_cuts = False

for sec, min_max in sector_cuts.items():
    print(min_max)
    cuts |= ((rec.electron_sector == sec) & (rec.mm2 >= min_max[0]) &  (rec.mm2 <= min_max[1]))
    print(np.sum(rec.mm2 >= min_max[0]))
    #mc_cuts |= ((mc_rec.electron_sector == sec) & (mc_rec.mm2 >= min_max[0]) &  (mc_rec.mm2 <= min_max[1]))
    
#rec = rec[cuts]

In [None]:
# mc_rec = mc_rec[["w","q2","mm2","cos_theta","phi","helicty"]].copy(deep=True)
# mc_thrown = mc_thrown[["w","q2","mm2","cos_theta","phi","helicty"]].copy(deep=True)
# rec = rec[["w","q2","mm2","cos_theta","phi","helicty"]].copy(deep=True)

# rec.head()

In [None]:
w_bins = np.arange(1.0, 1.8, 0.025)
q2_bins = np.arange(1.0, 3.5, 0.5)
theta_bins = np.arange(-1.0, 1.0, 0.25)

rec['w_bin'] = pd.cut(rec['w'], bins=w_bins, include_lowest=True)
rec['q2_bin'] = pd.cut(rec['q2'], bins=q2_bins, include_lowest=True)
rec['theta_bin'] = pd.cut(rec['cos_theta'], bins=theta_bins, include_lowest=True)

rec.dropna(inplace=True)

In [None]:
plt.hist2d(rec.w,rec.q2, cmap=cmap, bins=200)
plt.title("Data W vs $Q^2$")
plt.show()

plt.hist2d(rec.w,rec.q2,bins=(len(w_bins),len(q2_bins)), cmap=cmap)
plt.show()

In [None]:
rec.head()

In [None]:
def draw_bsa(rec, func):
    xs = np.linspace(0,2*np.pi,100)
    for w in np.unique(rec.w_bin):
        for q2 in np.unique(rec.q2_bin):
            for cos_t in np.unique(rec.theta_bin):
                rec_cut = ((w == rec.w_bin) & (q2 == rec.q2_bin) & (cos_t == rec.theta_bin))
                
                data = rec[rec_cut]
                pos = data[data.helicty == 1]
                neg = data[data.helicty == -1]

                fig, ax = plt.subplots(2, 2, figsize=(12,9))
                fig.suptitle(f"W={w},\t$Q^2$={q2},\tcos($\Theta$)={cos_t}")
                for bins in range(10,11):
                    pos_y, pos_x = np.histogram(pos.phi, bins=bins, range=(0, 2*np.pi))
                    x = (pos_x[1:]+pos_x[:-1])/2.0
                    neg_y, _ = np.histogram(neg.phi, bins=bins, range=(0, 2*np.pi))
                                    
                    # Change 0's to 1 for division
                    num = pos_y - neg_y
                    den = pos_y + neg_y
                    den = np.where(den == 0, 1, den)

                

                    ax[0][0].errorbar(x, pos_y, marker='.',yerr=stats.sem(pos_y), 
                                      c='r', linestyle='', label='pos')
                    ax[0][1].errorbar(x, neg_y, marker='.',yerr=stats.sem(neg_y), 
                                      c='orange', linestyle='', label='neg')
                    
                
                    bsa = num/den
                    ax[1][0].errorbar(x, bsa, yerr=stats.sem(bsa),
                                      marker='.', c='g', linestyle='',label='acceptance')
                
                
                    popt, pcov = curve_fit(func, x, bsa, maxfev=8000)
                    ax[1][1].errorbar(x, bsa, yerr=stats.sem(bsa), marker='.', 
                                      linestyle='', c='k', zorder=1, label='corrected')
                
                    plt.plot(xs, func(xs, *popt), c='#9467bd', linewidth=2.0)
                

                fig.legend()
                plt.show()
                # print(f'W: [{w.left},{w.right}] Q2: [{q2.left},{q2.right}] 
                # cos(theta): [{cos_t.left},{cos_t.right}].png')
                #plt.close()


In [None]:
draw_bsa(rec, model)

In [None]:
plt.hist(mc_thrown.w, bins=200, alpha=0.2)
plt.hist(mc_rec.w, bins=200, alpha=0.5)
plt.show()

In [None]:
rec.head()