In [None]:
import numpy as np
import mayfly as mf
import h5py
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
import os 
import sys
import json

PATH = '/storage/home/adz6/group/project/'
RESULTPATH = os.path.join(PATH, 'results/mayfly')
PLOTPATH = os.path.join(PATH, 'plots/mayfly')

def ToDataFrame(h5file):
    entry_list = []
    group = h5file['result']
    for i, dataset_key in enumerate(group):
        entry = {}
        for j, item in enumerate(group[dataset_key].attrs):
            entry[item] = group[dataset_key].attrs[item]
        entry['grid'] = group[dataset_key][:]
        entry_list.append(entry)
    
    return pd.DataFrame(entry_list)

def ExtractPeakCoordinates(data):
    
    peak_list = []
    for i in data.index:
        image = data['grid'].iloc[i]

        #peak_list.append(np.argwhere(image > np.mean(image) + 5 * np.std(image)))
        #peak_list.append(np.argwhere(image > np.max(image) - 2* np.std(image)))
        peak_list.append(np.argwhere(image > 2.2))

    data['peaks'] = peak_list
    return data

def ConvertCoordinatesToDataUnits(data, angles, energies):
    
    peak_x = []
    peak_y = []
    
    for i in data.index:
        peak_coord = data['peaks'].iloc[i]
        #print(peak_coord)
        peak_x.append(energies[peak_coord[:, 1]])
        peak_y.append(angles[peak_coord[:, 0]])
        
    data['peak_x'] = peak_x
    data['peak_y'] = peak_y
    
    return data

def ClusterPeaks(data):
    
    clusters = []
    
    cluster_means = []
    #print(len(data.index))
    for i in range(len(data.index)):
        #print(i)
        points = np.array(list(zip(data['peak_x'].iloc[i], data['peak_y'].iloc[i]))).squeeze()
        z = scipy.cluster.hierarchy.linkage(scipy.spatial.distance.pdist(points), optimal_ordering=True)
        cluster_tree_group_inds = scipy.cluster.hierarchy.cut_tree(z, height=[0., 0.2502])

        #print(cluster_tree_group_inds)
        
        ith_cluster_list = []
        ith_cluster_mean_list = []
        for j, group_index in enumerate(np.unique(cluster_tree_group_inds[:, 1])):
            
            cluster_points = points[cluster_tree_group_inds[np.argwhere(cluster_tree_group_inds[:, 1] == group_index).squeeze(), 0], :]
            #print(cluster_points)
            ith_cluster_list.append(
                cluster_points
            )
        #print(len(ith_cluster_list))
        for n, cluster_n in enumerate(ith_cluster_list):
            #print(ith_cluster_list[n])
            if ith_cluster_list[n].ndim == 1:
                ith_cluster_mean_list.append([np.mean(ith_cluster_list[n].reshape((1,2))[:, 0]), np.mean(ith_cluster_list[n].reshape((1,2))[:, 1])])
            else:
                ith_cluster_mean_list.append([np.mean(ith_cluster_list[n][:, 0]), np.mean(ith_cluster_list[n][:, 1])])

        clusters.append(ith_cluster_list)
        cluster_means.append(np.array(ith_cluster_mean_list))
        #data.loc[i, 'peak_clusters'] = ith_cluster_list
        #print(ith_cluster_list)
        #input()
    data['peak_clusters'] = clusters
    data['peak_cluster_means'] = cluster_means
    
    return data

def FitFunc(x, a, b): # x = 90 - \theta
    
    return a + b * x ** 1

def FitFuncInv(y, a, b):
    return + np.sqrt((y - a) / b)

def FitPolynomial(data, func):
    
    opt_param_list = []
    opt_cov_list = []
    for i in data.index:
        x_data = np.array(data.iloc[i]['peak_x'])
        y_data = np.array(data.iloc[i]['peak_y'])
        popt, copt = scipy.optimize.curve_fit(FitFunc, 90 - y_data, x_data - 18590)
        
        opt_param_list.append(popt)
        opt_cov_list.append(copt)
    
    data['fit_param'] = opt_param_list
    data['fit_cov'] = opt_cov_list
    
    return data
        
        


# get data

In [None]:
os.listdir(RESULTPATH)

In [None]:
result_list = os.listdir(RESULTPATH)
for result in result_list:
    if result == '210721_off_axis_2cm_study_with_noise.h5':
        selected_result = result
h5file = h5py.File(os.path.join(RESULTPATH, selected_result), 'r')

data = ToDataFrame(h5file)


In [None]:
h5file['result'].attrs['angles']

In [None]:
h5file['result'].attrs['energies']

# extract coordinates of peak positions

In [None]:
template_angles = np.sort(h5file['result'].attrs['angles'])
template_energies = np.sort(h5file['result'].attrs['energies'])
data = ExtractPeakCoordinates(data)
data = ConvertCoordinatesToDataUnits(data, template_angles, template_energies)

In [None]:
print(np.mean(data[(data['signal-angle'] == 89.5) & (data['signal-energy'] == 18595.0)]['grid'].item()) / 180)

# plot example degeneracy curve

In [None]:
figname = '210714_energy_pitch_angle_degeneracies_example5'

test_data = data[(data['signal-angle'] == 89.5) & (data['signal-energy'] == 18597.25)]
x_data = np.array(test_data['peak_x'].array[0])
y_data = np.array(test_data['peak_y'].array[0])
#print(x_data, y_data)
sig_E = test_data['signal-energy'].squeeze()
sig_a = test_data['signal-angle'].squeeze()
print(sig_E, sig_a)
#popt, copt = scipy.optimize.curve_fit(FitFunc, 90 - y_data, x_data - 18590)

#a = popt[0]
#b = popt[1]
#c = popt[2]

#E_fit = FitFunc(90 - y_data, a, b)

sns.set_theme(context='talk', style='white')
sns.set_style('ticks')
cmap = sns.color_palette('mako', as_cmap=True)

fig = plt.figure(figsize=(8,7))
ax = plt.subplot(1,1,1)

img = ax.imshow(test_data['grid'].squeeze(), 
           aspect='auto', 
           extent = (template_energies[0], template_energies[-1], template_angles[-1], template_angles[0]),
           cmap = cmap,
            interpolation='none'
            )
cbar = plt.colorbar(img, label='Matched Filter Score')
ax.set_xlabel('Template Energy (eV)')
ax.set_ylabel('Template Pitch Angle (deg)')
#ax.set_title('Polynomial Fit to MF Score Degeneracies')

ax.text(18593, 88.25, rf'Signal: $\theta$ = {sig_a}, E = {sig_E}', size=14, color='w')
#plt.tight_layout()
#plt.savefig(os.path.join(PLOTPATH, figname + '.png'))

#plt.plot(E_fit + 18590, abs(-y_data), 'r')

#plt.savefig(os.path.join(PLOTPATH, figname + 'fit_curve.png'))

#plt.plot(x_data, y_data, 'r.')
'''
#plt.savefig(os.path.join(PLOTPATH, figname + 'data_points.png'))

figname = '210628_fit_to_energy_pitchangle_degeneracies2'

test_data = data[(data['signal-angle'] == 89.0375) & (data['signal-energy'] == 18599.0)]
x_data = np.array(test_data['peak_x'].array[0])
y_data = np.array(test_data['peak_y'].array[0])
#print(x_data, y_data)
sig_E = test_data['signal-energy'].squeeze()
sig_a = test_data['signal-angle'].squeeze()
print(sig_E, sig_a)
#popt, copt = scipy.optimize.curve_fit(FitFunc, 90 - y_data, x_data - 18590)

#a = popt[0]
#b = popt[1]
#c = popt[2]

#E_fit = FitFunc(90 - y_data, a, b)

sns.set_theme(context='talk', style='white')
sns.set_style('ticks')
cmap = sns.color_palette('mako', as_cmap=True)

fig = plt.figure(figsize=(8,7))
ax = plt.subplot(1,1,1)

img = ax.imshow(test_data['grid'].squeeze(), 
           aspect='auto', 
           extent = (template_energies[0], template_energies[-1], template_angles[-1], template_angles[0]),
           cmap = cmap,
            interpolation='none'
            )
cbar = plt.colorbar(img, label='Matched Filter Score')
ax.set_xlabel('Template Energy (eV)')
ax.set_ylabel('Template Pitch Angle (deg)')
ax.set_title('Polynomial Fit to MF Score Degeneracies')

#ax.text(18594, 88.25, rf'Signal: $\theta$ = {sig_a}, E = {sig_E}', size=14, color='w')
plt.tight_layout()
#plt.savefig(os.path.join(PLOTPATH, figname + '.png'))

#plt.plot(E_fit + 18590, abs(-y_data), 'r')

#plt.savefig(os.path.join(PLOTPATH, figname + 'fit_curve.png'))

#plt.plot(x_data, y_data, 'r.')

#plt.savefig(os.path.join(PLOTPATH, figname + 'data_points.png'))

'''

# isolate data where degeneracies are mostly linear

In [None]:
data_linear_regime = data[(data['signal-angle']<=89.5)]

In [None]:
data_linear_regime

# use scipy clustering algorithm on the data

In [None]:
data_linear_regime = ClusterPeaks(data_linear_regime)

# plot an example to show the cluster positions

In [None]:
#print(data_linear_regime[(data_linear_regime['signal-angle'] == 89.5) & (data_linear_regime['signal-energy'] == 18596)]['peak_clusters'].item())
#print(data_linear_regime[(data_linear_regime['signal-angle'] == 89.5) & (data_linear_regime['signal-energy'] == 18596)]['peak_cluster_means'].item())

figname = '210714_clustering_energy_pitch_angle_degeneracies_example1'

test_data = data_linear_regime[(data_linear_regime['signal-angle'] == 88.0) & (data_linear_regime['signal-energy'] == 18595)]

x_data = np.array(test_data['peak_cluster_means'].array[0][:, 0])
y_data = np.array(test_data['peak_cluster_means'].array[0][:, 1])
#print(x_data, y_data)
sig_E = test_data['signal-energy'].squeeze()
sig_a = test_data['signal-angle'].squeeze()
print(sig_E, sig_a)
#popt, copt = scipy.optimize.curve_fit(FitFunc, 90 - y_data, x_data - 18590)

#a = popt[0]
#b = popt[1]
#c = popt[2]

#E_fit = FitFunc(90 - y_data, a, b)

sns.set_theme(context='talk', style='white')
sns.set_style('ticks')
cmap = sns.color_palette('mako', as_cmap=True)

fig = plt.figure(figsize=(8,7))
ax = plt.subplot(1,1,1)

img = ax.imshow(test_data['grid'].squeeze(), 
           aspect='auto', 
           extent = (template_energies[0], template_energies[-1], template_angles[-1], template_angles[0]),
           cmap = cmap,
            interpolation='none'
            )
cbar = plt.colorbar(img, label='Matched Filter Score')
ax.set_xlabel('Template Energy (eV)')
ax.set_ylabel('Template Pitch Angle (deg)')
#ax.set_title('Polynomial Fit to MF Score Degeneracies')

#ax.text(18594, 88.25, rf'Signal: $\theta$ = {sig_a}, E = {sig_E}', size=14, color='w')
plt.tight_layout()


#plt.plot(E_fit + 18590, abs(-y_data), 'r')

#plt.savefig(os.path.join(PLOTPATH, figname + 'fit_curve.png'))

plt.plot(x_data, y_data, 'r.')
plt.savefig(os.path.join(PLOTPATH, figname + '.png'))
#plt.savefig(os.path.join(PLOTPATH, figname + 'data_points.png'))


# for a specific energy, plot the postion of all clusters vs pitch angle

In [None]:
figname = '210714_degenerate_cluster_tracks_example_all_energies'

signal_energy = np.linspace(18590, 18600, 11)
sns.set_theme(context='talk', style='whitegrid')
fig = plt.figure(figsize=(13,8))
ax = fig.add_subplot(1,1,1)
for test_energy in signal_energy:
    test_data = data_linear_regime[(data_linear_regime['signal-energy'] == test_energy)]

    cluster_x_pos = []
    cluster_y_pos = []
    for i in range(len(test_data.index)):

        cluster_points = test_data.iloc[i]['peak_cluster_means']

        cluster_x_pos.extend(cluster_points[:, 0])
        cluster_y_pos.extend(cluster_points[:, 1])





    ax.plot(cluster_x_pos, cluster_y_pos, '.', label=f'Energy = {test_energy}')

ax.set_ylim(90, 88)

ax.set_xlabel('Template Energy (eV)')
ax.set_ylabel('Template Pitch Angle (deg)')

#plt.legend(loc=4)

plt.savefig(os.path.join(PLOTPATH, figname + '.png'))

# for a single pitch angle, plot the position of all clusters vs energy

In [None]:
figname = '210714_closely_spaced_parameters_appear distinguishable_example'


signal_pa = [89.1, 89.1125]
signal_energies = [18590, 18591]#np.linspace(18590, 18591, 2)
sns.set_theme(context='talk', style='whitegrid')
fig = plt.figure(figsize=(13,8))
ax = fig.add_subplot(1,1,1)
for test_pa in signal_pa:
    
   
    
    for test_energy in signal_energies:
        test_data = data_linear_regime[(data_linear_regime['signal-angle'] == test_pa) & (data_linear_regime['signal-energy'] == test_energy)]

        cluster_x_pos = []
        cluster_y_pos = []
        for i in range(len(test_data.index)):

            cluster_points = test_data.iloc[i]['peak_cluster_means']

            cluster_x_pos.extend(cluster_points[:, 0])
            cluster_y_pos.extend(cluster_points[:, 1])





            ax.plot(cluster_x_pos, cluster_y_pos, '.-', label=f'Pitch Angle = {test_pa}, Energy = {test_energy}')

#ax.set_ylim(90, 88.3)

ax.set_xlabel('Template Energy (eV)')
ax.set_ylabel('Template Pitch Angle (deg)')
plt.legend(loc=0)

plt.savefig(os.path.join(PLOTPATH, figname + '.png'))

# fit a curve to four closely spaced signals and compare fits

In [None]:
figname = '210714_comparing degenerate_points_shows_degeneracy_is_real'

signal_pa = [89.4, 89.35, 89.3625, 89.3875]
signal_energies = [18595, 18600, 18599, 18596]#np.linspace(18590, 18597, 2)
sns.set_theme(context='talk', style='whitegrid')
fig = plt.figure(figsize=(13,8))
ax = fig.add_subplot(1,1,1)

x_pos_list = []
y_pos_list = []


for params in zip(signal_pa, signal_energies):

    cluster_x_pos = []
    cluster_y_pos = []

    test_data = data_linear_regime[(data_linear_regime['signal-angle'] == params[0]) & (data_linear_regime['signal-energy'] == params[1])]


    for i in range(len(test_data.index)):

        cluster_points = test_data.iloc[i]['peak_cluster_means']

        cluster_x_pos.extend(cluster_points[:, 0])
        cluster_y_pos.extend(cluster_points[:, 1])

    x_pos_list.append(cluster_x_pos)
    y_pos_list.append(cluster_y_pos)
'''
for test_pa in signal_pa:
    for test_energy in signal_energies:
        
        cluster_x_pos = []
        cluster_y_pos = []
        
        test_data = data_linear_regime[(data_linear_regime['signal-angle'] == test_pa) & (data_linear_regime['signal-energy'] == test_energy)]

        
        for i in range(len(test_data.index)):

            cluster_points = test_data.iloc[i]['peak_cluster_means']

            cluster_x_pos.extend(cluster_points[:, 0])
            cluster_y_pos.extend(cluster_points[:, 1])





        x_pos_list.append(cluster_x_pos)
        y_pos_list.append(cluster_y_pos)
'''    
print(list(zip(x_pos_list[0], y_pos_list[0])))        

for i in range(len(x_pos_list)):
    ax.plot(x_pos_list[i], y_pos_list[i], '.-', markersize=16, label = f'Pitch Angle = {signal_pa[i]}, Energy = {signal_energies[i]}')
    
ax.set_xlabel('Template Energy (eV)')
ax.set_ylabel('Template Pitch Angle (deg)')

plt.legend(loc=0)

plt.savefig(os.path.join(PLOTPATH, figname + '.png'))

# scatter plot of best fit parameters for each test signal