In [None]:
from scipy import stats, special, interpolate
import numpy as np
from pathlib import Path
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
import awkward as ak
import pandas as pd
import math
from sklearn import metrics

def estimate_FFT():
    
    path2signals = Path.home()/'group'/'project'/'datasets'/'data'
    file = '220901_dl_test_data_85to90deg_5mm.h5'

    pitch_min=86.0
    pitch_max=88.6
    radius_min=0.005
    radius_max=0.005

    h5file = h5py.File(path2signals/file, 'r')

    metadata = {
        'energy':h5file['meta']['energy'][:],
        'pitch':h5file['meta']['theta_min'][:],
        'radius':h5file['meta']['x_min'][:]
    }

    metadata = pd.DataFrame(metadata)

    inds = np.array(metadata[
        (metadata['pitch']>=pitch_min)&(metadata['pitch']<=pitch_max)
        & (metadata['radius']>=radius_min)&(metadata['radius']<=radius_max)
    ].index.array)

    signal_freq_spectra = np.fft.fftshift(np.fft.fft(
        h5file['x'][inds, 0:8192], axis=-1, norm='forward'))
    
    nonzero_inds = np.argwhere(abs(signal_freq_spectra).sum(-1)>0).squeeze()
    signal_freq_spectra = signal_freq_spectra[nonzero_inds, :]
    
    peak_threshold = 1e-8
    system_noise_temp = 10
    n_sample = 8192
    noise_variance = 1.38e-23 * 50 * 60 * 205e6 * system_noise_temp / n_sample
    n_pts_dist = 101
    x = np.linspace(0, 1e-6, n_pts_dist)

    peaks = get_spectra_peaks(signal_freq_spectra, peak_threshold)
    cdf = generate_signal_cdf(peaks, x, noise_variance, n_sample)
    
    pdf = np.gradient(cdf, axis=-1)
    norm = 1/np.trapz(pdf, x)
    pdf = norm[:, np.newaxis] * pdf
    
    return cdf, pdf

def get_spectra_peaks(freq_spectra, threshold):
    
    peaks = []
    for i in range(freq_spectra.shape[0]):
        i_peak_inds = np.where(abs(freq_spectra[i, :]) > threshold)[0]
        i_peaks = abs(freq_spectra[i, i_peak_inds])

        peaks.append(list(i_peaks))
    
    return ak.Array(peaks)

def generate_signal_cdf(peaks, x, var, n_sample, batchsize=1000):
    
    peak_counts = ak.num(peaks, axis=1)
    num_spectra = ak.num(peaks, axis=0)
    
    spectra_inds = np.arange(0, num_spectra, 1)
    combined_cdf = np.zeros((num_spectra, x.size))
    
    for i, spectra_batch in enumerate(np.array_split(spectra_inds, num_spectra//batchsize)):
        
        batch_peaks = peaks[spectra_batch]
        batch_counts = ak.num(batch_peaks, axis=1)
        flat_batch_peaks = ak.to_numpy(ak.flatten(batch_peaks))
        
        batch_pdf = rician_pdf(x, var, flat_batch_peaks)
        batch_norm = 1/np.trapz(batch_pdf, x, axis=-1)
        batch_pdf = batch_norm[:, np.newaxis] * batch_pdf
        
        batch_cdf = cdf_from_pdf(x, batch_pdf)
        batch_cdf = combine_batch_cdf(batch_cdf, batch_counts)

        combined_cdf[spectra_batch, :] = batch_cdf * background_cdf_awk(x, batch_counts, n_sample, var)
        
        if i % 10 == 9:
            print(f'{i + 1} / {num_spectra//batchsize}')
            
    return combined_cdf