# The difference between the Edelweiss resolution function and the true Yield variance

## The problem
We perform the fit to the data using an approximation to the yield variance, not the true yield variance.  This begs the question, "does this impact our answer significantly?"

## What this notebook investigates
This notebook samples the posterior distribution and, for each parameter set sampled, stores the difference between the true yield and the estimate used by the fit.

This notebook focuses on a single energy.v

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import h5py
import numpy as np
import pandas as pd
from astropy.table import Table, Column, vstack
from astropy.io.misc.hdf5 import read_table_hdf5, write_table_hdf5

import sys
sys.path.append('../python/')
from EdwRes import *
from prob_dist import *
from checkDifference_yieldVariance import *

GGA3/NR/4.0/5.556E-02/0.0380/000/0.1493/0.1782/0.9975/
GGA3/NR/4.0/5.556E-02/0.0381/000/0.1537/0.1703/0.9948/


In [2]:
# We'll look at the Er values of the data points
# import data from Edelweiss
resNR_data = pd.read_csv("data/edelweiss_NRwidth_GGA3_data.txt", skiprows=1, \
                       names=['E_recoil', 'sig_NR', 'E_recoil_err', 'sig_NR_err'], \
                       delim_whitespace=True)

# the sorting is necessary!
# otherwise the mask defined below will select the wrong data
resNR_data = resNR_data.sort_values(by='E_recoil')
NR_data = {'Erecoil': resNR_data["E_recoil"][2::], 'sigma': resNR_data["sig_NR"][2::], 'sigma_err': resNR_data["sig_NR_err"][2::]}
Er = np.sort(NR_data['Erecoil'])
Erecoil = Er[0]
#print (NR_data['Erecoil'])
#print (NR_data['sigma'])
#print (NR_data['sigma_err'])
#print (len(samples))
#print(np.random.randint(len(samples), size=10))

filenames = []
for Erecoil in NR_data['Erecoil']:
    filename = 'data/yield_accuracy_Erecoil_%.2f_keV_all_corrAB_Aug2.h5' % Erecoil
    filenames.append(filename)
    
print (filenames)

['data/yield_accuracy_Erecoil_24.50_keV_all_corrAB_Aug2.h5', 'data/yield_accuracy_Erecoil_34.22_keV_all_corrAB_Aug2.h5', 'data/yield_accuracy_Erecoil_44.26_keV_all_corrAB_Aug2.h5', 'data/yield_accuracy_Erecoil_58.40_keV_all_corrAB_Aug2.h5', 'data/yield_accuracy_Erecoil_97.72_keV_all_corrAB_Aug2.h5']


In [3]:
# read the data into a pandas dataframe
df_24keV = pd.read_hdf(filenames[0], key='table')
df_34keV = pd.read_hdf(filenames[1], key='table')
df_44keV = pd.read_hdf(filenames[2], key='table')
df_58keV = pd.read_hdf(filenames[3], key='table')
df_97keV = pd.read_hdf(filenames[4], key='table')
#df_97keV.drop_duplicates()

df_24keV['Yield'] = df_24keV['A']*np.power(df_24keV['energy_recoil_keV'], df_24keV['B'])
df_34keV['Yield'] = df_34keV['A']*np.power(df_34keV['energy_recoil_keV'], df_34keV['B'])
df_44keV['Yield'] = df_44keV['A']*np.power(df_44keV['energy_recoil_keV'], df_44keV['B'])
df_58keV['Yield'] = df_58keV['A']*np.power(df_58keV['energy_recoil_keV'], df_58keV['B'])
df_97keV['Yield'] = df_97keV['A']*np.power(df_97keV['energy_recoil_keV'], df_97keV['B'])

In [4]:
from sklearn.linear_model import LinearRegression

mask = df_24keV['true_yield_sig'].notnull()
y = df_24keV[mask]['true_yield_sig'] - df_24keV[mask]['cor1_yield_sig']
X = df_24keV[mask][['aH', 'scale', 'A', 'B']]

reg = LinearRegression().fit(X, y)

print(reg.score(X, y))

print ("coefficents: ", reg.coef_)
print ("intercept: ", reg.intercept_)

aH, scale, A, B = 0.0381134613, 0.994778557, 0.153737587, 0.170327657
X0 = np.array([[aH, scale, A, B]])
print(reg.predict(X0))


predicted=0
print('CALCULATION')
print('intercept: {}'.format(reg.intercept_))
for i,coef in enumerate(reg.coef_):
    #print(i)
    print('coef X X0 = {:01.7f} X {:01.7f} = {:01.7f}'.format(coef,X0[0,i],coef*X0[0,i]))
    #print(X0[0,i])
    predicted+=coef*X0[0,i]
    
predicted+=reg.intercept_
print(predicted)

0.9881916830492861
coefficents:  [0.00552431 0.00133703 0.01633244 0.02117115]
intercept:  -0.007662520580006483
[-4.98203281e-06]
CALCULATION
intercept: -0.007662520580006483
coef X X0 = 0.0055243 X 0.0381135 = 0.0002106
coef X X0 = 0.0013370 X 0.9947786 = 0.0013300
coef X X0 = 0.0163324 X 0.1537376 = 0.0025109
coef X X0 = 0.0211711 X 0.1703277 = 0.0036060
-4.982032810407766e-06


In [5]:
mask = df_34keV['true_yield_sig'].notnull()
y = df_34keV[mask]['true_yield_sig'] - df_34keV[mask]['cor1_yield_sig']
X = df_34keV[mask][['aH', 'scale', 'A', 'B']]

reg = LinearRegression().fit(X, y)

print(reg.score(X, y))

print ("coefficents: ", reg.coef_)
print ("intercept: ", reg.intercept_)

aH, scale, A, B = 0.0381134613, 0.994778557, 0.153737587, 0.170327657
X0 = np.array([[aH, scale, A, B]])
print(reg.predict(X0))


predicted=0
for i,coef in enumerate(reg.coef_):
    predicted+=coef*X0[0,i]
    
predicted+=reg.intercept_
print(predicted)

0.9883018033806901
coefficents:  [0.00891606 0.00139528 0.013981   0.01819466]
intercept:  -0.006976876162577476
[-5.97786977e-07]
-5.977869772712946e-07


In [6]:
mask = df_44keV['true_yield_sig'].notnull()
y = df_44keV[mask]['true_yield_sig'] - df_44keV[mask]['cor1_yield_sig']
X = df_44keV[mask][['aH', 'scale', 'A', 'B']]

reg = LinearRegression().fit(X, y)

print(reg.score(X, y))

print ("coefficents: ", reg.coef_)
print ("intercept: ", reg.intercept_)

aH, scale, A, B = 0.0381134613, 0.994778557, 0.153737587, 0.170327657
X0 = np.array([[aH, scale, A, B]])
print(reg.predict(X0))


predicted=0
for i,coef in enumerate(reg.coef_):
    predicted+=coef*X0[0,i]
    
predicted+=reg.intercept_
print(predicted)

0.9870510077511788
coefficents:  [0.01268575 0.00128421 0.01304974 0.01676499]
intercept:  -0.006621463867453043
[1.31282427e-06]
1.312824273716831e-06


In [7]:
mask = df_58keV['true_yield_sig'].notnull()
y = df_58keV[mask]['true_yield_sig'] - df_58keV[mask]['cor1_yield_sig']
X = df_58keV[mask][['aH', 'scale', 'A', 'B']]

reg = LinearRegression().fit(X, y)

print(reg.score(X, y))

print ("coefficents: ", reg.coef_)
print ("intercept: ", reg.intercept_)

aH, scale, A, B = 0.0381134613, 0.994778557, 0.153737587, 0.170327657
X0 = np.array([[aH, scale, A, B]])
print(reg.predict(X0))


predicted=0
for i,coef in enumerate(reg.coef_):
    predicted+=coef*X0[0,i]
    
predicted+=reg.intercept_
print(predicted)

0.9848943746599956
coefficents:  [0.0195214  0.00114802 0.01256059 0.01603726]
intercept:  -0.00654459822082259
[4.08033284e-06]
4.080332840841749e-06


In [8]:
mask = df_97keV['true_yield_sig'].notnull()
y = df_97keV[mask]['true_yield_sig'] - df_97keV[mask]['cor1_yield_sig']
X = df_97keV[mask][['aH', 'scale', 'A', 'B']]

reg = LinearRegression().fit(X, y)

print(reg.score(X, y))

print ("coefficents: ", reg.coef_)
print ("intercept: ", reg.intercept_)

aH, scale, A, B = 0.0381134613, 0.994778557, 0.153737587, 0.170327657
X0 = np.array([[aH, scale, A, B]])
print(reg.predict(X0))


predicted=0
for i,coef in enumerate(reg.coef_):
    predicted+=coef*X0[0,i]
    
predicted+=reg.intercept_
print(predicted)

0.9820387798382159
coefficents:  [0.02973745 0.00088643 0.01310807 0.01668303]
intercept:  -0.006859726936860776
[1.22579782e-05]
1.2257978188948138e-05


In [9]:
from edw_data_util import *
ER_data, NR_data = getERNR()

In [10]:
print(NR_data['Erecoil'])

3    24.5012
6    34.2156
2    44.2627
5    58.4014
4    97.7172
Name: E_recoil, dtype: float64


In [11]:
import prob_dist as pd
import imp
imp.reload(pd)

En = 24.5

aH, scale, A, B = 0.0381134613, 0.994778557, 0.153737587, 0.170327657
Vmod = 4.0*scale
corr2 = pd.series_NRQ_sig_c2(Er=En,F=0.0,V=Vmod,aH=aH,alpha=(1/18.0),A=A,B=B,label='GGA3',verbose=True)

print(corr2)

[0.00552431 0.00133703 0.01633244 0.02117115]
-0.007662520580006483
intercept: -0.007662520580006483
coef X X0 = 0.0055243 X 0.0381135 = 0.0002106
coef X X0 = 0.0013370 X 0.9947786 = 0.0013300
coef X X0 = 0.0163324 X 0.1537376 = 0.0025109
coef X X0 = 0.0211711 X 0.1703277 = 0.0036060
-4.978939628740367e-06


In [12]:
var0 = pd.series_NRQ_var(Er=En,F=0.0,V=Vmod,aH=aH,alpha=(1/18.0),A=A,B=B,label='GGA3')
var1 = pd.series_NRQ_var_corr1(Er=En,F=0.0,V=Vmod,aH=aH,alpha=(1/18.0),A=A,B=B,label='GGA3')
var2 = pd.series_NRQ_var_corr2(Er=En,F=0.0,V=Vmod,aH=aH,alpha=(1/18.0),A=A,B=B,label='GGA3',verbose=True)

print(var0)
print(var1)
print(var2)
print(np.sqrt(var1)-np.sqrt(var2))

[0.00552431 0.00133703 0.01633244 0.02117115]
-0.007662520580006483
intercept: -0.007662520580006483
coef X X0 = 0.0055243 X 0.0381135 = 0.0002106
coef X X0 = 0.0013370 X 0.9947786 = 0.0013300
coef X X0 = 0.0163324 X 0.1537376 = 0.0025109
coef X X0 = 0.0211711 X 0.1703277 = 0.0036060
0.002268664529354329
0.0024378325770850356
0.0024373409374400827
4.978939628738632e-06
