In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from collections import defaultdict
from sklearn.neighbors import KDTree

# bodies, coordinates, filopodia

In [None]:
data_dict = defaultdict(dict)
folder = '/Users/wangweiying/Desktop/R6_41hAPF_tif/R6_41hAPF_ED6' # Folder that contained results tables of Filopodyan

# Get all csv files and their information
for each_file in Path(folder).rglob('*/*.csv'):
    if '__MACOSX' in str(each_file):
        next
    else:
        file_name = each_file.name
        file_type = file_name.split('_')[-3]
        file_parameter = each_file.parent.name.split('_')[-1]
        table_type = file_name.split('_')[-1].replace('.csv','')

        if table_type == 'Bodies':
            Bodies = pd.read_csv(each_file)

        elif table_type == 'Coordinates':
            coordinates_list = []
            coordinates_all_list = []
            with open(each_file,encoding='utf-8') as f:
                line_num = 1
                for each_line in f:
                    if line_num == 2:
                        line_values = each_line.strip().split(',')
                        # 9 data per group
                        index = 0
                        for i in range(1, len(line_values),9):
                            group_values = line_values[i:i+9] # data of each group
                            tip_x = float(group_values[3])
                            tip_y = float(group_values[4])
                            coordinates_list.append([tip_x, tip_y])
                            coordinates_all_list.append(group_values)
                            index += 1
                    line_num +=1
            data_dict[file_type][file_parameter+'_coordinates'] = coordinates_list
            data_dict[file_type][file_parameter+'_coordinates_all'] = coordinates_all_list
        elif table_type == 'Filopodia':
            filopodia_list = []
            with open(each_file,encoding='utf-8') as f:
                line_num = 1
                for each_line in f:
                    if line_num == 2:
                        line_values = each_line.strip().split(',')
                        # 13 data per group
                        index = 0
                        for i in range(1, len(line_values),13):
                            group_values = line_values[i:i+13] 
                            filopodia_list.append(group_values)
                            index += 1
                    line_num +=1
            data_dict[file_type][file_parameter+'_filopodia'] = filopodia_list

# Read manual data

In [None]:
filename = '/Users/wangweiying/Desktop/R6_41hAPF_tif/41hAPF_R6_Front_Heel_L-cell_Coordinates.xlsx' # File of mannually annotated data
human_marker = pd.read_excel(filename,sheet_name='Front and Front Filopodia')

In [None]:
with open('result.csv','w') as fw:
    fw.write('tag,index,x,y,par,index,distance,c_x,c_y,T,BaseX,BaseY,TipX,TipY,' + \
             'body:base,base:tip,base:tip;body:base,base:tip;body:base,' + \
            'T,dT,BaseMean,BaseLocalBackground,BodyMean,ProjMean,TipMean,TipThMean,TipLocalBackground,Length,dL,DCTM,DCBM')
    fw.write('\n')
    flag = ''
    for index,row in human_marker.iterrows():
        try:
            if np.isnan(float(row[0])):
                next
            else:
                flag_index = row[0]
                flag_label = row[1]
                flag_x = float(row[2])
                flag_y = float(row[3])
                if flag_index ==1:
                    next
                else:
                    table_values = data_dict[flag]
                    # Iterate through all loG's to find the nearest
                    for key in table_values.keys():
                        if key.split('_')[-1] == 'coordinates':
                            # Calculate the index of the nearest point,kd-tree algorithm
                            tree = KDTree(np.array(table_values[key]))
                            dist,ind = tree.query(np.array([[flag_x, flag_y]]),k=3)
                            #print(f'## manual coordinates{flag_x=}, {flag_y=}')
                            for each_index in ind[0]:
                                print(each_index, table_values[key][each_index])
                            result_list = [str(flag), str(flag_index), str(flag_x), str(flag_y), key.split('_')[0], 
                                           ind[0][0], dist[0][0]]
                            result_list.extend(table_values[key][ind[0][0]])
                            result_list.extend(table_values[key+'_all'][ind[0][0]])
                            result_list.extend(table_values[key.split('_')[0] + '_filopodia'][ind[0][0]])
                            result_list = [str(i) for i in result_list]
                            fw.write(','.join(result_list))
                            fw.write('\n')
        except Exception as e:
            # print(e)
            # print(row[0])
            if row[0].strip() != '':
                flag = row[0].strip()

# Plots (heatmap)

In [None]:
import seaborn as sns

df = pd.read_csv('./result.csv')

In [None]:
df.columns

In [None]:
df

In [None]:
sub_df = df[['tag','par','distance']].copy()

In [None]:
new_tag_list = []
for i in sub_df['tag']:
    if len(i.replace('T',''))==1:
        tag_name = 'T0'+i.replace('T','')
    else:
        tag_name = i
    new_tag_list.append(tag_name)
sub_df['tag'] = new_tag_list

In [None]:
sub_df_group = sub_df.groupby(['tag','par']).agg(
    distance = pd.NamedAgg(column='distance', aggfunc='sum')).reset_index()

In [None]:
pivot_sub_df = pd.pivot_table(sub_df_group,index=["tag"],columns=['par'],values=["distance"])['distance']

In [None]:
import matplotlib.pyplot as plt
sns.set_context({"figure.figsize":(20,8)})
fig = sns.heatmap(data=pivot_sub_df,square=True,cmap = 'Blues',annot=True)
plt.title('R6_41hAPF')
scatter_fig = fig.get_figure()
scatter_fig.savefig('./heatmap.png', dpi = 400)

# accumulative sum (line plot and box plot)

In [None]:
par_summary = sub_df.groupby(['par']).agg(
    sum_distance = pd.NamedAgg(column='distance', aggfunc='sum')).reset_index()

In [None]:
par_summary['par_value'] = par_summary.par.apply(lambda x: 
                                                 float(x.replace('loG','')))

In [None]:
from plotnine import *
p1 = (ggplot(par_summary,aes('par_value','sum_distance'))
+ geom_line(colour='red')
+ geom_point(colour='blue')
+ xlab('log')
+ ylab('sum_distance')
+ theme(figure_size=(8,4))
#+ theme(figure_size=(8,4),text=element_text(family='FangSong'))
+ ggtitle('R6_41hAPF'))
#+ geom_text(aes('par_value','sum_distance',label='sum_distance')))
p1.save('./line.png')

In [None]:
p1

In [None]:
p2 = (ggplot(sub_df_group,aes('par','distance',fill='par'))
+ geom_boxplot()
+ xlab('log')
+ ylab('distance')
+ theme(figure_size=(8,4),text=element_text(family='FangSong'))
+ ggtitle('R6_41hAPF'))
p2.save('./boxplot.png')

In [None]:
p2