In [None]:
import cudf
import pandas as pd
import numpy as np
from pathlib import Path

In [None]:
data_folder = Path.cwd() / "data" / "VIS"
plot_folder = data_folder / "visualization"
plot_folder.mkdir(parents=True, exist_ok=True)

synapses_file = data_folder / "synapses_flat.csv"
soma_file = data_folder / "meta" / "soma.csv"

grid_size =  200000 
grid_size_descriptor = f"grid-{grid_size}"

selected_pre_ids = [2, 5, 25, 26, 30, 37, 38, 54, 82, 173, 210, 282, 298, 327, 337, 338, 353, 421]

In [None]:
synapses_df = cudf.read_csv(synapses_file, dtype={
    "x": "uint32", 
    "y": "uint32", 
    "z": "uint32",     
    "pre_id": "uint64", 
    "post_id": "uint64", 
    "pre_celltype": "int8", 
    "post_celltype": "int8", 
    "post_compartment": "int8"
})

synapses_df.shape

In [None]:
soma_df = pd.read_csv(soma_file, dtype={
    "neuron_id" : "uint64",
    "neuron_id_mapped" : "int16", 
}, usecols=["neuron_id", "neuron_id_mapped"])

mapping_dict = {np.uint64(row.neuron_id): np.int16(row.neuron_id_mapped - 1000) for row in soma_df.itertuples(index=False)}

In [None]:
soma_df

In [None]:
synapses_df.groupby(["pre_celltype", "post_celltype"]).size()

In [None]:
synapses_df["pre_id_mapped"] = cudf.Series(-1 * np.ones(len(synapses_df)), dtype='int16')
synapses_df["post_id_mapped"] = cudf.Series(-1 * np.ones(len(synapses_df)), dtype='int16')
synapses_df["overlap_volume"] = cudf.Series(np.zeros(len(synapses_df)), dtype='uint64')

In [None]:
from lib.gridder import Gridder

gridder = Gridder(data_folder/"grid")

xyz = synapses_df[["x", "y", "z"]].to_numpy()

gridder.setPositions(xyz)
indices, df_grid_meta = gridder.computeGrid(grid_size)
synapses_df["overlap_volume"] = indices.astype("uint64")

In [None]:
df_grid_meta.to_csv(data_folder/f"{grid_size_descriptor}_meta.csv", index=False)

In [None]:
synapses_df.sample(frac=0.10).to_csv(plot_folder/f"synapses_{grid_size_descriptor}_10pct.csv", index=False)

In [None]:
for neuron_id, mapped_id in mapping_dict.items():
    synapses_df.loc[synapses_df.pre_id == neuron_id, "pre_id_mapped"] = mapped_id
    synapses_df.loc[synapses_df.post_id == neuron_id, "post_id_mapped"] = mapped_id

In [None]:
synapses_mapped_neurons = synapses_df[(synapses_df.pre_id_mapped > -1) | (synapses_df.post_id_mapped > -1)]
synapses_mapped_neurons.to_csv(plot_folder/f"synapses_{grid_size_descriptor}_mapped-neurons.csv", index=False)
synapses_mapped_neurons

In [None]:
synapses_btw_mapped_neurons = synapses_df[(synapses_df.pre_id_mapped > -1) & (synapses_df.post_id_mapped > -1)]
synapses_btw_mapped_neurons.to_csv(plot_folder/f"synapses_{grid_size_descriptor}_between-mapped-neurons.csv", index=False)
synapses_btw_mapped_neurons

In [None]:
selected_overlap_volumes = synapses_mapped_neurons.overlap_volume.unique()
selected_overlap_volumes.size

In [None]:
range_filter_enabled = False

if(range_filter_enabled):
    
    offset = 5000

    x_min = synapses_mapped_neurons.x.min() - offset
    y_min = synapses_mapped_neurons.y.min() - offset
    #z_min = synapses_mapped_neurons.z.min() - offset

    x_max = synapses_mapped_neurons.x.max() + offset
    y_max = synapses_mapped_neurons.y.max() + offset
    #z_max = synapses_mapped_neurons.z.max() + offset

    df_synapses_range = synapses_df[
        #(synapses_df.x >= x_min) & (synapses_df.x <= x_max) &
        #(synapses_df.y >= y_min) & (synapses_df.y <= y_max) &
        synapses_df.overlap_volume.isin(selected_overlap_volumes)
        #(synapses_df.z >= z_min) & (synapses_df.z <= z_max) 
    ]

    df_synapses_range.reset_index(drop=True)

else:

    df_synapses_range = synapses_df.copy()

In [None]:
df_synapses_range.to_pandas().to_csv(data_folder/f"synapses_{grid_size_descriptor}_mapped-neurons-range.csv", index=False)
df_synapses_range.sample(frac=0.10).to_csv(plot_folder/f"synapses_{grid_size_descriptor}_mapped-neurons-range_10pct.csv", index=False)

In [None]:
df_synapses_range

In [None]:
df_synapses_aggregated = df_synapses_range.groupby(["overlap_volume", 
                                                    "pre_id_mapped", 
                                                    "post_id_mapped",
                                                    "pre_celltype", 
                                                    "post_celltype",
                                                    "post_compartment"]).size().reset_index(name="synapse_count")

df_synapses_aggregated.to_pandas().to_csv(data_folder/f"synapses_{grid_size_descriptor}_aggregated.csv", index=False)
print(df_synapses_aggregated.synapse_count.sum())
df_synapses_aggregated