In [93]:
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
plt.close('all')
import scipy.optimize as opt
import scipy

import xlwings as xw
import os

# Efficiency at low activities (<0.05 $\mu$Ci)

Looking to investigate the efficiency of the Hidex at low activities, so need to reduce the noise in the measurements. To do this:
1. Find an appropriate activity (low noise) to measure the FWHM of both peaks (511 keV and 1022 keV)
2. Reprocess all data from set 4 that is <0.1 $\mu$Ci and extract counts for both peaks +- FWHM
3. Pass the above counts to the efficiency script

## 1. Find the FWHM of both peaks

In [94]:
# get the data from Excel (NOTE: the original files aren't well-formatted so i had to extract 
# the spectra by hand. However for step 2 I will need to work directly with the Excel files so this is a problem for later.)

f = open("./data/HidexAMG-Track_30min-035-20240502-013749-AutoExport_SPECTRA.xlsx", "rb")
sample_spectra = pd.read_excel(f,
                               engine='openpyxl')
f.close()

sample_spectra = sample_spectra.to_numpy()[:, 2:] #ignores the rack and vial numbers

In [95]:
def gauss(x, p): # p[0]==mean, p[1]==stdev, p[2]==normalization factor
    return p[2]*np.exp(-(x-p[0])**2/(2*p[1]**2))

FWHM_511_list = []
FWHM_1022_list = []

for i in range(0,10):
    plt.plot(sample_spectra[i, :])

    X = np.linspace(0, 2047, 2048)
    Y = sample_spectra[i, :]
    
    # Fit a guassian to the 511 keV peak
    p0 = [511,1, 2000] # Inital guess is a normal distribution
    errfunc = lambda p, x, y: gauss(x, p) - y # Distance to the target function
    
    p1, success = opt.leastsq(errfunc, p0[:], args=(X, Y))
    
    fit_mu, fit_stdev, fit_height = p1
    
    FWHM = 2*np.sqrt(2*np.log(2))*fit_stdev
    FWHM_511_list.append(FWHM)
    plt.plot(gauss(X, p1))

    # Fit a guassian to the 1022 peak
    p0 = [1022,1, 2000] # Inital guess is a normal distribution
    errfunc = lambda p, x, y: gauss(x, p) - y # Distance to the target function
    
    p1, success = opt.leastsq(errfunc, p0[:], args=(X, Y))
    
    fit_mu, fit_stdev, fit_height = p1
    
    FWHM = 2*np.sqrt(2*np.log(2))*fit_stdev
    FWHM_1022_list.append(FWHM)
    plt.plot(gauss(X, p1))

plt.xlabel("Channel (keV)")
plt.ylabel("Counts")
plt.grid()
plt.show()

In [96]:
FWHM_1022 = np.mean(FWHM_1022_list)
s_FWHM_1022 = np.std(FWHM_1022_list)

FWHM_511 = np.mean(FWHM_511_list)
s_FWHM_511 = np.std(FWHM_511_list)

print(FWHM_1022, "+-", s_FWHM_1022)
print(FWHM_511, "+-", s_FWHM_511)

54.11739367081469 +- 0.6751569326997817
39.3216609300854 +- 0.2736845499617138


## 2. Generate the counts of the low-activity data with the new windows
1. pick files with low enough activity and move them to the correct subfolder
2. use python to find the spectra in these files, then add up the counts in the correct windows (also calculate the 400-600 window to compare)

In [128]:
# start with one file then i'll make it into a function
# NOT THE FINAL VERSION (SEE BELOW)
filepath = r"C:\Users\yuliy\OneDrive\Documents\Fall 2023\Research\efficiency\low_activity\data\low_activity_files\HidexAMG-Track_30min-035-20240502-110624-AutoExport.xlsx"

ws = xw.Book(filepath).sheets['Spectra']

spectra = ws["C20"].options(np.array, expand='table').value
p1_start, p1_end, p2_start, p2_end = int(511 - FWHM_511), int(511 + FWHM_511), int(1022 - FWHM_1022), int(1022 + FWHM_1022) #peak edges
counts = np.sum(spectra[:, p1_start:p1_end+1], axis=1) + np.sum(spectra[:, p2_start:p2_end+1], axis=1) # add up the counts in both peaks
counts_old = np.sum(spectra[:, 400:601], axis=1) # sanity check, these are the same as the old data

print(p1_start, p1_end, p2_start, p2_end)

471 550 967 1076


## 2.1 Remove background spectra
The background counts are not the same in every energy channel. We want to obtain a less noisy background spectrum from 14 background files taken at different times of the day, that we can subtract from the measured spectra from the samples.

In [147]:
# note: not using data file HidexAMG-Background Long-016-20230608-121915-AutoExport because it is unusually flat compared to the other ones
bg_path = "./data/backgrounds/"
for _, _, bg_files in os.walk(bg_path): break

bg_data = np.zeros((len(bg_files), 2048))

excel_app = xw.App(visible=False)

i=0
for file in bg_files:
    if "~" in f: continue # disregards temporary files
    path = bg_path + file
    
    # open up the excel sheet with the spectrum
    wb = xw.Book(path)
    sheet = wb.sheets['Spectra']
    spectra = sheet["C20"].options(np.array, expand='table').value
    wb.save()
    wb.close()
    
    bg_data[i, :] = spectra/30 # save it in CPM
    
    plt.plot(spectra/30, "grey")

    #print(np.sum(spectra[400:601])/30) # sanity check: should be around 42
    i+=1

excel_app.quit()

bg_spectrum = np.mean(bg_data, axis=0)
plt.title("Background averaging")
plt.ylabel("CPM")
plt.plot(bg_spectrum, "r")
plt.show()

## 2.2 Calculate the counts

In [148]:
def counts(file, peak_options="20-10", remove_background=True):
    # argument is the path to the excel file generated by the hidex. There should be a sheet titled Spectra
    # the FWHM are defined above
    # peak_options = "FWHM", "2FWHM" or "20-10"
    path = "./data/low_activity_files/" + file
    wb = xw.Book(path)
    sheet = wb.sheets['Spectra']
    spectra = sheet["C20"].options(np.array, expand='table').value
    wb.save()
    wb.close()

    # remove background calculated above
    if remove_background:
        spectra -= bg_spectrum

    if peak_options == "2FWHM":
        p1_start, p1_end, p2_start, p2_end = int(511 - FWHM_511), int(511 + FWHM_511), int(1022 - FWHM_1022), int(1022 + FWHM_1022) #peak edges
    elif peak_options == "20-10":
        p1_start, p1_end, p2_start, p2_end = 409, 613, 920, 1124 # 20% around main peak, 10% around coincidence peak
    elif peak_options == "1FWHM":
        p1_start, p1_end, p2_start, p2_end = int(511 - FWHM_511/2), int(511 + FWHM_511/2), int(1022 - FWHM_1022/2), int(1022 + FWHM_1022/2)
    else: return
    
    counts = np.sum(spectra[:, p1_start:p1_end+1], axis=1) + np.sum(spectra[:, p2_start:p2_end+1], axis=1) # add up the counts in both peaks
    counts_old = np.sum(spectra[:, 400:601], axis=1) # sanity check, these are the same as the old data

    # also collect the timestamps so that we don't get lost in the sauce
    wb = xw.Book(path)
    ws = wb.sheets['Results']
    timestamps = ws["C27:C36"].options(np.array, dtype='datetime64').value
    wb.save()
    wb.close()
    
    return counts, counts_old, timestamps

# walk through the folder with the files and collect the counts for all of them
for root, dirs, files in os.walk("./data/low_activity_files/"): break

In [149]:
excel_app = xw.App(visible=False)

counts_list = []
counts_2fwhm_list = []
counts_1fwhm_list = []
old_counts_list = []
timestamps_list = []

for f in files:
    if "~" in f: continue # disregards temporary files
    print(f)
    
    c, c_old, t = counts(f)
    c_2fwhm, _, _ = counts(f, peak_options="2FWHM") 
    c_fwhm, _, _ = counts(f, peak_options="1FWHM") 
    # TODO: implement this directly into the function since opening/closing excel files takes a long time
    
    counts_list.extend(c.tolist())
    counts_2fwhm_list.extend(c_2fwhm.tolist())
    counts_1fwhm_list.extend(c_fwhm.tolist())
    timestamps_list.extend(t)
    old_counts_list.extend(c_old.tolist())

excel_app.quit()

HidexAMG-Track_30min-035-20240502-082900-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-084445-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-090029-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-091613-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-093157-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-094742-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-100326-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-101910-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-103454-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-105040-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-110624-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-112209-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-113753-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-115338-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-120922-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-122509-AutoExport.xlsx
HidexAMG-Track_30min-035-20240502-124055-AutoExport.xlsx
HidexAMG-Track_30min-035-202405

## 3. Checking if the noise is reduced
Finally, we want to compare if there is a difference in the noise between the two methods (400-600 window or two peaks windows)

In [150]:
# Sanity check: plot the old vs the new counts
plt.plot(timestamps_list, old_counts_list, ".", label="400-600 keV window")
plt.plot(timestamps_list, counts_list, ".", label="Two peaks, 20%-10% windows")
plt.plot(timestamps_list, counts_2fwhm_list, ".", label="Two peaks, 2 FWHM")
plt.ylabel("CPM")
plt.xlabel("timestamp")
plt.yscale("log")
plt.grid()
plt.legend()
plt.show()

In [151]:
# pablo wants to fit linear functions to the log-scaled data, and compare the r-squared
# need to separate individual vials for this
timestamps = np.array(timestamps_list).astype('datetime64[s]').astype('int') - timestamps_list[0].astype('datetime64[s]').astype('int')

n = 10 # number of vials

r2_old = []
r2_2fwhm = []
r2_2010 = []
r2_1fwhm = []

for i in range(0, 10):
    x = timestamps[i::n]
    y = np.log(old_counts_list[i::n])
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)
    print(r_value**2)
    r2_old.append(r_value**2)

print()
for i in range(0, 10):
    x = timestamps[i::n]
    y = np.log(counts_2fwhm_list[i::n])
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)
    print(r_value**2)
    r2_2fwhm.append(r_value**2)

print()
for i in range(0, 10):
    x = timestamps[i::n]
    y = np.log(counts_list[i::n])
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)
    print(r_value**2)
    r2_2010.append(r_value**2)

print()
for i in range(0, 10):
    x = timestamps[i::n]
    y = np.log(counts_1fwhm_list[i::n])
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)
    print(r_value**2)
    r2_1fwhm.append(r_value**2)

0.9954611677827839
0.997678239500497
0.9981095800029982
0.9952707223516475
0.9979671228599021
0.9989345872345392
0.999049690777638
0.9985008623730792
0.9987502370039555
0.9993449942982765

0.9953670331986866
0.9985604568193478
0.9980906651670708
0.9946405768823385
0.9977633266383663
0.9987994885403814
0.9991527922538346
0.9985236420245028
0.999348669248496
0.9994166436597706

0.9951174511504445
0.9980593999424948
0.998522092418439
0.9957142870126072
0.9972891030799208
0.998911716236857
0.9989831360658483
0.9987166396217567
0.9992449589465527
0.9995598094486665

0.9949088557729504
0.9977276450148542
0.9968626795264035
0.9941916428060016
0.9970700262073903
0.9973555554941482
0.9985189910710028
0.998511278477066
0.9993211558100767
0.9989878777279052


In [152]:
vials = np.linspace(1, 10, 10)
plt.plot(vials, r2_old, ".", label="400-600 keV window")
plt.plot(vials, r2_2fwhm, ".", label="Two peaks, 2 FWHM")
plt.plot(vials, r2_1fwhm, ".", label="Two peaks, 1 FWHM")
plt.plot(vials, r2_2010, ".", label="Two peaks, 20%-10% windows")
plt.legend()
plt.xlabel("Vial")
plt.ylabel("R$^2$ value of linear fit")
plt.show()