In [None]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from IPython.display import display

from lib.constants import *
from lib.nodelink_viewer import *
import lib.VIS_L23_preprocessing.vis_L23_constants as VIS
from lib.pandas_compute import *
from lib.matrix_analyzer import *
from lib.multilevel_analysis import *
from lib.pandas_impl import *
from lib.pandas_stats_VIS import *

from lib.util_plot import *

In [None]:
eval_folder = Path.cwd() / 'data' / 'eval' / 'VIS_24-12-17_25mu'
plot_folder = eval_folder / 'plots' 
plot_folder_nodelink = plot_folder / 'nodelink'
plot_folder_nodelink.mkdir(exist_ok=True, parents=True)

min_num_synapses = 3

#### Data preparation

In [None]:
filename = eval_folder / "multilevel_analysis.pkl"
with open(filename, 'rb') as file:
    multilevel_analysis = pickle.load(file)
df_summary = multilevel_analysis.df_summary.reset_index()

df_summary.set_index(['pre_celltype', 'post_celltype', 'pre_id_mapped', 'post_id_mapped',
       'post_compartment', 'overlap_volume', 'post_celltype_merged',
       'pre_celltype_merged'], inplace=True)

In [None]:
df = df_summary.reset_index()
df_filtered = df[(df.pre_id_mapped >= 0) & (df.post_id_mapped >= 0)]

df_features_cellular = df_filtered.groupby(["pre_celltype", "pre_id_mapped", "post_celltype_merged"]).agg({
    EMPIRICAL : "sum",
    MODEL_NULL : "sum",
    f"{MODEL_Pa}_preference" : "mean",
    f"{MODEL_PaS}_preference" : "mean",
    f"{MODEL_PaSb}_preference" : "mean",
    f"{MODEL_PSa}_preference" : "mean",
})

df_features_subcellular = df_filtered.groupby(["pre_celltype", "pre_id_mapped", "post_celltype_merged", "post_compartment"]).agg({
    EMPIRICAL : "sum",
    MODEL_NULL : "sum",
    f"{MODEL_Pa}_preference" : "mean",
    f"{MODEL_PaS}_preference" : "mean",
    f"{MODEL_PaSb}_preference" : "mean",
    f"{MODEL_PSa}_preference" : "mean",
})

In [None]:
df_filtered[EMPIRICAL].sum()

In [None]:
df_compartments = df_features_subcellular.groupby(["pre_id_mapped", "pre_celltype", "post_compartment"]).agg({EMPIRICAL:"sum"}).reset_index()
df_compartments[df_compartments.pre_celltype == 20];

In [None]:
df_tmp = df_features_cellular.groupby("pre_id_mapped").agg({EMPIRICAL : "sum"}).reset_index()
neurons_wo_synapses = df_tmp[df_tmp[EMPIRICAL] < min_num_synapses].pre_id_mapped.values

df_features_cellular = df_features_cellular[~df_features_cellular.index.get_level_values("pre_id_mapped").isin(neurons_wo_synapses)].reset_index()
df_features_subcellular = df_features_subcellular[~df_features_subcellular.index.get_level_values("pre_id_mapped").isin(neurons_wo_synapses)].reset_index()

#### Plot feature distributions

In [None]:
mask_exc_exc = (df_features_cellular.pre_celltype == 1) & (df_features_cellular.post_celltype_merged == 1)
mask_exc_inh = (df_features_cellular.pre_celltype == 1) & (df_features_cellular.post_celltype_merged > 1)

mask_inh_exc = (df_features_cellular.pre_celltype > 1) & (df_features_cellular.post_celltype_merged == 1)
mask_inh_inh = (df_features_cellular.pre_celltype > 1) & (df_features_cellular.post_celltype_merged > 1)

In [None]:
df_features_cellular[mask_exc_exc][f"{MODEL_Pa}_preference"].describe()

#### Build feature vectors

In [None]:
def build_features(df_cellular, df_subcellular):
    pre_ids = np.unique(df_cellular.pre_id_mapped.values.astype(int)).tolist()
    
    X = np.ones((len(pre_ids), 5))
    y = np.zeros(len(pre_ids))
    labels_long = ["" for _ in range(len(pre_ids))]

    for _, row in df_cellular.iterrows():
        class_label = row.pre_celltype
        preference_value = row[f"{MODEL_Pa}_preference"]
        
        row_idx = pre_ids.index(row.pre_id_mapped)
        if(row.post_celltype_merged == VIS.EXC_INH[0]):
            X[row_idx, 0] = preference_value
        else:
            X[row_idx, 1] = preference_value
        
        y[row_idx] = class_label
        labels_long[row_idx] = VIS.CELLTYPE_LABELS_SHORT[class_label] + f"\n{int(row.pre_id_mapped)}"

    for _, row in df_subcellular.iterrows():
        preference_value = row[f"{MODEL_PaS}_preference"]
        
        row_idx = pre_ids.index(row.pre_id_mapped)
        if(row.post_compartment == VIS.SOMA[0]):
            X[row_idx, 2] = preference_value
        elif(row.post_compartment == VIS.DEND[0]):
            X[row_idx, 3] = preference_value
        elif(row.post_compartment == VIS.AIS[0]):
            X[row_idx, 4] = preference_value
        else:
            raise ValueError()

    return y, X, labels_long 


In [None]:
pre_ids = np.unique(df_features_cellular[df_features_cellular.pre_celltype.isin(VIS.INH)].pre_id_mapped.values.astype(int)).tolist()
sorted(pre_ids)

In [None]:
y, X, labels_long = build_features(df_features_cellular, df_features_subcellular)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

y_binary = y.copy()
y_binary[y_binary == 1] = 0
y_binary[y_binary > 1] = 1

In [None]:
pre_ids

### Table with specificity values

In [None]:
post_domain = [
    (VIS.EXC_INH[0], VIS.SOMA[0]),
    (VIS.EXC_INH[0], VIS.DEND[0]),
    (VIS.EXC_INH[0], VIS.AIS[0]),
    (VIS.EXC_INH[1], VIS.SOMA[0]),
    (VIS.EXC_INH[1], VIS.DEND[0]),
    (VIS.EXC_INH[1], VIS.AIS[0]),
]

matrix_analyzer = ConnectomeMatrixAnalyzer(df_summary, plot_folder)
matrix_analyzer.set_selection(include_filter_and={
    "pre_id_mapped" : pre_ids    
})
matrix_analyzer.set_data_columns("model-PaS_preference")
matrix_analyzer.build_matrix(["pre_celltype", "pre_id_mapped"], 
                             ["post_celltype_merged", "post_compartment"],    
                             row_domains = None,# VIS.EXC_INH_UNKNOWN,  
                             col_domains = post_domain,
                             value_label_map = {
                                 "pre_celltype" : VIS.CELLTYPE_LABELS,
                                 "post_celltype_merged" : VIS.CELLTYPE_LABELS,
                                 "post_compartment" : VIS.COMPARTMENT_LABELS,
                             },
                             aggregation_fn="mean",
                             default_value=1)
matrix_analyzer.render_matrix("VIS-INH-model-P-preference_all", 
                              colormap_name="coolwarm", vmin=-1, vmax=1, normalization_function=NormalizePreferenceValue(0, 8),
                              col_separator_lines=True, row_separator_lines=True, high_res=False)

In [None]:
from matplotlib.colors import SymLogNorm

matrix_analyzer = ConnectomeMatrixAnalyzer(df_summary, plot_folder)
matrix_analyzer.set_selection(include_filter_and={
    "pre_id_mapped" : pre_ids    
})
matrix_analyzer.set_data_columns(EMPIRICAL)
matrix_analyzer.build_matrix(["pre_celltype", "pre_id_mapped"], 
                             ["post_celltype_merged", "post_compartment"],    
                             row_domains = None,# VIS.EXC_INH_UNKNOWN,  
                             col_domains = post_domain,
                             value_label_map = {
                                 "pre_celltype" : VIS.CELLTYPE_LABELS,
                                 "post_celltype_merged" : VIS.CELLTYPE_LABELS,
                                 "post_compartment" : VIS.COMPARTMENT_LABELS,
                             },
                             aggregation_fn="sum",
                             default_value=0)
matrix_analyzer.render_matrix("VIS-INH-empirical_all", 
                              colormap_name="binary", vmin=0, vmax=1000, normalization_function=SymLogNorm(0.999, 1, vmin=0),
                              col_separator_lines=True, row_separator_lines=True, high_res=False)

In [None]:
matrix_analyzer = ConnectomeMatrixAnalyzer(df_summary, plot_folder)
matrix_analyzer.set_selection(include_filter_and={
    "pre_id_mapped" : pre_ids    
})
matrix_analyzer.set_data_columns(MODEL_NULL)
matrix_analyzer.build_matrix(["pre_celltype", "pre_id_mapped"], 
                             ["post_celltype_merged", "post_compartment"],    
                             row_domains = None,# VIS.EXC_INH_UNKNOWN,  
                             col_domains = post_domain,
                             value_label_map = {
                                 "pre_celltype" : VIS.CELLTYPE_LABELS,
                                 "post_celltype_merged" : VIS.CELLTYPE_LABELS,
                                 "post_compartment" : VIS.COMPARTMENT_LABELS,
                             },
                             aggregation_fn="sum",
                             default_value=0)
matrix_analyzer.render_matrix("VIS-INH-null_all", 
                              colormap_name="binary", vmin=0, vmax=1000, normalization_function=SymLogNorm(0.999, 1, vmin=0),
                              col_separator_lines=True, row_separator_lines=True, high_res=False)

In [None]:
norm_fn = NormalizePreferenceValue(0, 8)	
M_normed = norm_fn(matrix_analyzer.matrix)
M_normed

mask_inh = y > 1
y_inh = y[mask_inh]
X_inh = X[mask_inh]

labels_long_inh = [labels_long[i] for i in range(len(labels_long)) if mask_inh[i]]

def get_label_csv(pre_id):
    idx = pre_ids.index(pre_id)
    label = labels_long[idx]
    return label.split("\n")[0].replace("$","").replace("I","INH").replace("{","").replace("}","")

with open(plot_folder / "subcellular_preference_by_cell.csv", "w") as f:
    f.write(",".join(["pre_celltype", "pre_id","exc_soma","exc_dend","exc_ais","inh_soma","inh_dend","inh_ais"]) + "\n")
    for row_idx, row in enumerate(M_normed):
        f.write(get_label_csv(pre_ids[row_idx]) + "," + str(pre_ids[row_idx]) + "," + ",".join([f'{x:.3f}' for x in row]) + "\n")

In [None]:
csv_ids = []
csv_labels = []
csv_exc_soma = []
csv_exc_dend = []
csv_exc_ais = []
csv_inh_soma = []
csv_inh_dend = []
csv_inh_ais = []

for idx, pre_id in enumerate(pre_ids):
    df_rows = df_filtered[(df_filtered.pre_id_mapped == pre_id) & (df_filtered.post_celltype_merged == VIS.EXC_INH[0])]
    
    print(df_rows.shape)
    break

In [None]:
for label in labels_long_inh:
    print(label.split("\n"))

In [None]:
np.count_nonzero(y_binary)

### 2D Embedding

In [None]:
%matplotlib inline

celltypes = VIS.CELLTYPES_ALL[:-1]

colors = ["tab:red",
          "tab:blue", 
          "tab:orange", 
          "tab:green", 
          "tab:purple",
          "tab:brown", 
          "tab:pink", 
          "tab:olive"]

color_idx = {
    1  : 0,
    2  : 1,
    20  : 1,
    21  : 2,
    22  : 3,
    23  : 4,
    24  : 5,
    25  : 6
}

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

In [None]:
X_pca[y == 1].shape

In [None]:
initPlotSettings(spines_top_right=True)

fig, ax = plt.subplots(figsize=figsize_mm_to_inch(50,40))

for ct_idx, ct in enumerate(celltypes):
    if(ct == 1):
        marker = "^"
        ms = 10
    else:
        marker = "o"
        ms = 12
    plt.scatter(X_pca[y == ct, 0], X_pca[y == ct, 1], color=colors[ct_idx], label=VIS.CELLTYPE_LABELS_SHORT[ct], marker=marker, s=ms, alpha=0.8, linewidths=0)
plt.xlabel("PCA 1")
plt.ylabel("PCA 2")
#plt.yscale("symlog")
plt.legend()
plt.xticks([])
plt.yticks([])
plt.show()

img = savefig_png_svg(fig, plot_folder/"pca_embedding")
display(img)

### Clustering

In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage

selected = [y > 1]

X_inh = X_scaled[y > 1] 
y_inh = y[y > 1]
y_labels = [labels_long[i] for i in range(len(labels_long)) if y[i] > 1]

# Perform hierarchical/agglomerative clustering
# Create a linkage matrix for the dendrogram
Z = linkage(X_inh, method='ward', optimal_ordering=True)

# Plot the dendrogram
fig = plt.figure(figsize=figsize_mm_to_inch(170,50))
dendrogram(Z, labels=y_labels, color_threshold=8)
plt.show()
plt.yticks([])
plt.subplots_adjust(top=0.99, left=0.01, right=0.99, bottom=0.22)
img = savefig_png_svg(fig, plot_folder/"dendrogram")
display(img)