# Peaks detection and features measurment

---

In [None]:
import os
import sys
import glob
import logging
import yaml
import time

import matplotlib.pyplot as plt
import plotly.express as px

import numpy as np
import numpy.polynomial.polynomial as poly
import pandas as pd

import scipy
from scipy import signal
from scipy import stats
from scipy import ndimage

from sklearn import preprocessing

## Preprocessing & parameters

#### Custom func

In [None]:
def norm_prof_to_arr(input_df):
    """ Normilize raw Ca profiles in 0-1 range and convert data frame to numpy array
    Return:
        - np array (dims - [component, raw_prof, df_prof, norm_prof])
        - dict ({component:raw_prof})

    """
    output_arr = []
    output_dict = {}
    comp_list = np.array(input_df.comp.unique())
    for comp in comp_list:
        comp_raw_prof = np.array(input_df['profile_raw'][input_df['comp'] == comp])
        comp_df_prof = np.array(input_df['profile_ddf'][input_df['comp'] == comp])
        comp_norm_prof = preprocessing.minmax_scale(comp_raw_prof)
        output_arr.append([comp_raw_prof, comp_df_prof, comp_norm_prof])
        output_dict.update({comp:comp_raw_prof})
    return np.asarray(output_arr), output_dict

def ctrl_plot(input_df):
    # raw profiles view
    fig=px.line(input_df,
                y='profile_ddf',
                x='time',
                color='comp',
                animation_frame="comp",
                title='Individual components ΔF/F profiles')
    fig.update(layout_yaxis_range = [min(input_df.profile_ddf), max(input_df.profile_ddf)])
    fig.update_layout(showlegend=False)
    fig.show()

def prof_peaks_plot(input_profile, peaks_i, peaks_prop, x_time=None, add_profile=None):
    """ SciPy find_peaks results for individual profile plotting,
    for 0-1 range scaling profile only

    """
    # features calc
    if x_time is None:
        x_time = np.linspace(0, input_profile.shape[0], input_profile.shape[0])
    prom = peaks_prop['prominences']
    prom_h = input_profile[peaks_i] - prom
    width_w = np.sort(np.asarray([*peaks_prop['left_bases'], *peaks_prop['right_bases']], dtype=int))
    width_pairs =  np.asarray(list(zip(peaks_prop['left_bases'], peaks_prop['right_bases'])), dtype=int)
    fwhm_y = peaks_prop['width_heights']
    fwhm_l = x_time[np.asarray(peaks_prop['left_ips'], dtype=int)]
    fwhm_r = x_time[np.asarray(peaks_prop['right_ips'], dtype=int)]

    # plotting
    plt.figure(figsize=(20, 8))
    plt.plot(x_time, input_profile)
    plt.plot(x_time[peaks_i], input_profile[peaks_i], 'x', color='red')
    plt.plot(x_time[width_w], input_profile[width_w], '.', color='red')
    plt.vlines(x=x_time[peaks_i], ymin=prom_h, ymax=input_profile[peaks_i], color='red')
    plt.hlines(y=fwhm_y, xmin=fwhm_l, xmax=fwhm_r, color='red')
    plt.hlines(y=.5, xmin=0, xmax=np.max(x_time), linestyles='--', color='k')
    plt.fill_between(x=x_time,
                     y1=input_profile,
                     y2=.5,
                     color='y',
                     alpha=.2,
                     where=input_profile>=.5)
    for peak_width in width_pairs:
        plt.fill_between(x= x_time[peak_width[0]:peak_width[1]], 
                         y1= input_profile[peak_width[0]:peak_width[1]], 
                         color= "red",
                         alpha= 0.2)
    if add_profile is not None:
        plt.plot(x_time, add_profile, color='g', alpha=.25)
    # plt.legend()
    plt.show()

def peaks_prop_plot(input_profile, peaks_i, peaks_prop, x_time=None, add_profile=None, save_path=None):
    """ SciPy find_peaks results for individual profile plotting,
    for df/F profiles

    """
    # features calc
    if x_time is None:
        x_time = np.linspace(0, input_profile.shape[0], input_profile.shape[0])
    prom = peaks_prop['prominences']
    prom_h = input_profile[peaks_i] - prom
    width_w = np.sort(np.asarray([*peaks_prop['left_ips'], *peaks_prop['right_ips']], dtype=int))
    width_pairs =  np.asarray(list(zip(peaks_prop['left_ips'], peaks_prop['right_ips'])), dtype=int)
    fwhm_y = peaks_prop['width_heights']
    fwhm_l = x_time[np.asarray(peaks_prop['left_ips'], dtype=int)]
    fwhm_r = x_time[np.asarray(peaks_prop['right_ips'], dtype=int)]

    # plotting
    plt.figure(figsize=(20, 8))
    plt.plot(x_time, input_profile)
    plt.plot(x_time[peaks_i], input_profile[peaks_i], 'x', color='red')
    plt.plot(x_time[width_w], input_profile[width_w], '.', color='red')
    plt.vlines(x=x_time[peaks_i], ymin=prom_h, ymax=input_profile[peaks_i], color='red')
    plt.hlines(y=fwhm_y, xmin=fwhm_l, xmax=fwhm_r, color='red')
    for peak_num in range(width_pairs.shape[0]):
        peak_width = width_pairs[peak_num]
        peak_base_val = fwhm_y[peak_num]
        plt.fill_between(x=x_time[peak_width[0]:peak_width[1]], 
                         y1=input_profile[peak_width[0]:peak_width[1]],
                         y2=np.full((peak_width[1]-peak_width[0]), peak_base_val), 
                         color="red",
                         alpha=0.2)
    if add_profile is not None:
        plt.plot(x_time, add_profile, color='g', alpha=.25)
    plt.tight_layout()
    # plt.legend()
    if save_path:
        plt.savefig(save_path, dpi=300)
    plt.show()

def cascade_plot(input_df, time, line_dict, y_shift=0.5, save_path=None):
    """ prof_arr, [prof_num, prof_val] - 2d numpy array with dF/F profiles

    """
    list_ROI = np.array(input_df.comp.unique())
    time_bar = int(200 * (max(time)/len(time)))

    plt.figure(figsize=(20, 8))
    
    # profiles plotting
    shift = 0
    for num_ROI in list_ROI:
        prof_ROI = np.array(input_df['profile_ddf'][input_df['comp'] == num_ROI])
        plt.plot(time, prof_ROI+shift, alpha=.5, label=f'ROI {num_ROI}')
        shift -= y_shift

    # bars plotting
    first_prof_max = np.array(input_df['profile_ddf'][input_df['comp'] == list_ROI[0]]).max()

    for line_name in line_dict:
        if line_name == 'ctrl':
            continue
        line_lim = line_dict[line_name]
        plt.plot(line_lim, [first_prof_max+0.05] * len(line_lim), label=line_name, linewidth=4)

    plt.vlines(x=[-10], ymin=[first_prof_max-(1+0.15)], ymax=[first_prof_max+0.15], linewidth=3, color='k')
    # plt.text(x=-30, y=-0.2, s="100% ΔF/F", size=15, rotation=90.)

    plt.hlines(y=[first_prof_max+0.15], xmin=[-10], xmax=[time_bar-10], linewidth=3, color='k')
    # plt.text(x=30, y=-1.15, s="200 s", size=15)

    plt.title('x bar - 200s, y bar - 100% ΔF/F', loc='left')
    plt.axis('off')
    plt.legend(loc=1)
    plt.tight_layout()
    if save_path:
        plt.savefig(f'{save_path}/components_profile_selected.png', dpi=300)
    plt.show()

#### Data & sample global parameters uploading

In [None]:
samp_name = 'E0003'
samp_path = os.path.join(''.join(sys.path[0].split('glia')), 'data_glia', samp_name)


# sample data frame uploading
total_df = pd.read_csv(f'{samp_path}/{samp_name}_components_df.csv')
total_arr, _ = norm_prof_to_arr(total_df)
print(total_df.head())

# sample YAML metadata file uploading
with open(f'{samp_path}/{samp_name}_meta.yaml') as f:
    samp_meta = yaml.safe_load(f)


# time parameters (from meta file)
total_reg_t = samp_meta['Reg_time']
frame_time = total_reg_t / max(total_df['frame_num'])
time_line = np.linspace(0, total_reg_t, num=max(total_df['frame_num'])+1)

# treatment parameters (from meta file)
treatment_dict = {}
for samp in samp_meta['Events']:
    treat_name = samp['Type']
    start_time = samp['Time']/1000
    treatment_dict.update({treat_name:start_time})
keys = list(treatment_dict.keys())
vals = list(treatment_dict.values()) + [total_reg_t]
treatment_dict = {keys[i]:[vals[i], vals[i+1]] for i in range(len(keys))}
treatment_dict.update({'ctrl':[0., list(treatment_dict.values())[0][0]]})
# for k,v in treatment_dict.items():  # app time print
#     print(k, ':', np.around(v, 1))
application_lines_dict = {t:np.linspace(treatment_dict[t][0], treatment_dict[t][1]) for t in treatment_dict}

# peaks detection parameters (from meta file)
peaks_det_meta = samp_meta['Peaks_det']
bad_prof_list = list(peaks_det_meta['Bad_prof'])
print(f'Bad profiles: {bad_prof_list}')

ctrl_plot(total_df)
total_df = total_df[~total_df['comp'].isin(bad_prof_list)]  # drop bad profiles

cascade_plot(total_df, time_line, application_lines_dict, save_path=samp_path)

## Peaks detection

In [None]:
print(peaks_det_meta)
# detection options
min_distance_sec = peaks_det_meta['Min_dist_sec']
min_distance_frames = int(min_distance_sec / frame_time)
if min_distance_frames < 1:
    min_distance_frames = 1

# duration options
width_sec = peaks_det_meta['Width_sec']  # [min, max]
width_frames = np.asanyarray([width_sec[0]/frame_time, width_sec[1]/frame_time], dtype=int)

# prominence calc window option
wlen_sec = peaks_det_meta['Wlen_sec']
wlen_frames = int(wlen_sec / frame_time)


# profiles img saving path
img_path = f'{samp_path}/peaks_prop'
if not os.path.exists(img_path):
    os.makedirs(img_path)

# peaks features data frame init
pf_df = pd.DataFrame(columns=['sample',     # sample name
                              'comp',       # spatial component ID
                              'app_group',  # application group, based on meta file
                              'peak_i',     # peal index in profile, frames
                              'peak_time',  # peak time, sec
                              'rise',       # rise time, sec
                              'decay',      # decay time, sec
                              'FWHM',       # full width at half maximum, sec
                              'amp_abs',    # absolute amplitude, a.u.
                              'AUC_abs',    # area ubder a curve in rise-decay window, a.u.
                              'amp_dF',     # amplitude, ΔF/F
                              'AUC_dF'])     # area ubder a curve in rise-decay window, ΔF/F

# loop over all profiles
components_ID = np.array(total_df.comp.unique())
for component in components_ID:
    det_prof = np.asarray(total_df['profile_ddf'][total_df['comp'] == component])
    raw_prof = np.asarray(total_df['profile_raw'][total_df['comp'] == component])

    # peaks detection
    peaks, properties = signal.find_peaks(det_prof,
                                        height=np.max(det_prof)*peaks_det_meta['Heigh_min_proc'],
                                        threshold=None,
                                        distance=min_distance_frames,
                                        wlen=wlen_frames,
                                        prominence=(np.max(det_prof)-np.min(det_prof))*peaks_det_meta['Prom_min_proc'],
                                        rel_height=0.95,
                                        width=width_frames)

    # FWHM calc
    fwhm_properties = signal.peak_widths(det_prof, peaks, wlen=wlen_frames)

    # profile peaks plot
    peaks_prop_plot(input_profile=det_prof, peaks_i=peaks, peaks_prop=properties, x_time=time_line,
                    add_profile=None, save_path=f'{img_path}/ROI{component}_prof_prop.png')

    # loop over app peaks
    for peak_num in range(len(peaks)):
        peak_index = peaks[peak_num]
        peak_time = time_line[peak_index]

        # treatment group
        for treatment in treatment_dict.keys():
            treatment_time = list(treatment_dict[treatment])
            if peak_time >= treatment_time[0] and peak_time < treatment_time[1]:
                app_group = treatment
            else:
               continue 

        # time features section
        rise = (peak_index - properties['left_bases'][peak_num]) * frame_time
        decay = (properties['right_bases'][peak_num] - peak_index) * frame_time
        fwhm = fwhm_properties[0][peak_num]

        # amplitude values
        amp_abs = raw_prof[peak_index]
        amp_dF = det_prof[peak_index]
        
        # signal integral section
        peak_bool_mask = np.zeros_like(det_prof, dtype=bool)
        peak_bool_mask[properties['left_bases'][peak_num]:properties['right_bases'][peak_num]] = 1
        auc_abs = np.sum(raw_prof, where=peak_bool_mask)
        auc_dF = np.sum(det_prof, where=peak_bool_mask)        
    
        # data frame update
        pf_row = pd.DataFrame({'sample':[samp_name],     # sample name
                               'comp':[component],       # spatial component ID
                               'app_group':[app_group],  # application group, based on meta file
                               'peak_i':[peak_index],    # peal index in profile, frames
                               'peak_time':[peak_time],  # peak time, sec
                               'rise':[rise],            # rise time, sec
                               'decay':[decay],          # decay time, sec
                               'FWHM':[fwhm],            # full width at half maximum, sec
                               'amp_abs':[amp_abs],      # absolute amplitude, a.u.
                               'AUC_abs':[auc_abs],      # area ubder a curve in rise-decay window, a.u.
                               'amp_dF':[amp_dF],        # amplitude, ΔF/F
                               'AUC_dF':[auc_dF]})       # area ubder a curve in rise-decay window, ΔF/F     
        pf_df = pd.concat([pf_df, pf_row], ignore_index=True)

print(pf_df)
pf_df.to_csv(f'{samp_path}/peaks_properties_df.csv')
