In [3]:
import pandas as pd
import numpy as np
import os, sys
import re,glob
from matplotlib import pyplot as plt
import seaborn as sns
from scipy.stats import linregress
from tqdm import tqdm, trange

In [4]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
plt.rc("font",family="Arial")

In [5]:
df = pd.read_csv("terminal_info_subtype22.csv",index_col=0)

In [6]:
color_dict = {'Adcyap1': '#ff0000', 'Tac2': '#00ffff', 'Nts': '#000080','Pdyn': '#93278f'}

In [7]:
# Extended Data Fig11a
for region in [["AMd","AMv","AD","AV","IAM","IAD","LD"]]:
        dfplot = df.loc[(df.region.isin(region))]
        dfplot2 = dfplot.pivot_table(index="filename",values=["soma_AP","soma_DV","soma_ML","ter_AP","ter_DV","ter_ML"],aggfunc="mean")
        dfplot2["color"] = [color_dict[item.split("_")[2]] for item in dfplot2.index.to_list()]
        fig, axs = plt.subplots(3, 3, figsize=(15, 12))
        for i,soma_label in enumerate(["soma_AP","soma_DV","soma_ML"]):
                for j,ter_label in enumerate(["ter_AP","ter_DV","ter_ML"]):
                        x = dfplot2[soma_label]
                        y = dfplot2[ter_label]

                        regression_result = linregress(x, y)
                        axs[i, j].scatter(x=x, y=y, s=10,c= dfplot2["color"])
                        axs[i, j].plot([min(x), max(x)],
                                [min(x) * regression_result.slope + regression_result.intercept,
                                max(x) * regression_result.slope + regression_result.intercept],
                                c='black',
                                linewidth=1.5,
                                dashes=[4, 2]
                                )
                        textstr = '\n'.join(['r={:.3f}'.format(regression_result.rvalue),
                                'p={:.3e}'.format(regression_result.pvalue)])
                        axs[i, j].text(x = 0.02,
                                y = 0.9,   
                                s = textstr,
                                transform=axs[i, j].transAxes, 
                                fontsize=12,
                                verticalalignment='top'
                                )

                        if soma_label.__contains__("ML"):
                                axs[i, j].invert_xaxis()

                        if ter_label.__contains__("ML"):
                                axs[i, j].invert_yaxis()


                        axs[i, j].set_title(soma_label+"-"+ter_label)
                        axs[i, j].set_xlabel(soma_label)
                        axs[i, j].set_ylabel(ter_label)
        plt.suptitle('Subtype22_%s_all'%region[0], fontsize=16, fontweight='bold')
        plt.subplots_adjust(wspace=0.25, hspace=0.35) 
        plt.savefig("Subtype22_%s_all.jpg"%region[0],dpi=600)
        plt.savefig("Subtype22_%s_all.pdf"%region[0],dpi=600)
        dfplot.to_csv("subtype22_all_plot1_%s.csv"%region[0])
        dfplot2.to_csv("subtype22_all_plot2_%s.csv"%region[0])
        plt.close()
                

In [8]:
dfplot

Unnamed: 0,index,id,type,ter_AP,ter_DV,ter_ML,radius,parent,region,neuron,...,somas_hemisphere,terminal_hemisphere,is_ipsi,filename,cluster,somas,geno,ter_pca,soma_pca,region_new
13,13,6609,0,6013.87,3495.87,4512.80,1.019531,6608,AV,210996_013,...,True,True,True,210996_013_Adcyap1_SUM,22,MBO,Adcyap1,-321.134299,250.998826,AV
14,14,6777,0,6071.82,3430.36,4404.12,0.699219,6776,AV,210996_013,...,True,True,True,210996_013_Adcyap1_SUM,22,MBO,Adcyap1,-242.957310,250.998826,AV
15,15,6928,0,5924.80,3577.00,4530.89,0.351562,6927,AV,210996_013,...,True,True,True,210996_013_Adcyap1_SUM,22,MBO,Adcyap1,-437.577243,250.998826,AV
21,21,7608,0,5899.75,3621.27,4532.84,0.847656,7607,AV,210996_013,...,True,True,True,210996_013_Adcyap1_SUM,22,MBO,Adcyap1,-480.836076,250.998826,AV
22,22,7641,0,5832.85,3759.36,4563.14,0.699219,7640,AV,210996_013,...,True,True,True,210996_013_Adcyap1_SUM,22,MBO,Adcyap1,-605.013711,250.998826,AV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,77,6217,0,6047.34,4560.61,5242.52,0.351562,6216,AMd,211461_015,...,True,True,True,211461_015_Nts_MMme,22,MBO,Nts,-779.309900,157.493054,AM
78,78,6221,0,6051.76,4554.61,5243.00,0.234375,6220,AMd,211461_015,...,True,True,True,211461_015_Nts_MMme,22,MBO,Nts,-772.522337,157.493054,AM
79,79,6234,0,6056.17,4559.44,5267.47,0.523438,6233,AMd,211461_015,...,True,True,True,211461_015_Nts_MMme,22,MBO,Nts,-770.061169,157.493054,AM
2,2,2795,0,6249.45,4286.30,5385.42,0.437500,2794,IAD,211269_106,...,True,True,True,211269_106_Tac2_LM,22,MBO,Tac2,-464.289069,218.103455,IAD


In [10]:
dfplot2.loc[dfplot2.index.str.contains("Tac2")]

Unnamed: 0_level_0,soma_AP,soma_DV,soma_ML,ter_AP,ter_DV,ter_ML,color
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
202562_021_Tac2_MMl,8125.15,6333.6,5129.51,5796.440667,4263.378333,4676.623,#00ffff
202562_023_Tac2_MMl,8203.38,6277.94,5070.9,5733.93,4100.894444,4721.251111,#00ffff
202562_024_Tac2_MMl,8253.64,6196.74,5112.55,5751.672941,4085.776471,4638.096471,#00ffff
202562_025_Tac2_MMl,8287.26,6268.59,5208.53,5771.30625,4258.4475,4725.56625,#00ffff
202562_031_Tac2_MMl,8058.9,6030.47,5259.96,6029.693333,4619.056667,4707.106667,#00ffff
202562_032_Tac2_MMl,8023.66,6352.27,5012.91,5734.524667,4173.196667,4676.086,#00ffff
202562_034_Tac2_LM,7929.2,6297.54,4784.67,5997.627284,3726.619259,4893.002593,#00ffff
202562_035_Tac2_LM,8001.62,6309.32,4845.06,5942.450294,4213.488529,5040.017353,#00ffff
202562_036_Tac2_LM,8109.34,6218.92,4848.96,5976.108919,3858.050721,4950.377477,#00ffff
202562_037_Tac2_LM,8064.38,6168.69,4796.51,5933.957667,3915.353333,4940.692333,#00ffff


In [11]:
# Extended Data Fig11c
for geno in ["Tac2","Nts"]:
        dfplot_tmp = dfplot2.loc[dfplot2.index.str.contains(geno)]
        fig, axs = plt.subplots(3, 3, figsize=(15, 12))
        for i,soma_label in enumerate(["soma_AP","soma_DV","soma_ML"]):
                for j,ter_label in enumerate(["ter_AP","ter_DV","ter_ML"]):
                        x = dfplot_tmp[soma_label]
                        y = dfplot_tmp[ter_label]

                        regression_result = linregress(x, y)
                        axs[i, j].scatter(x=x, y=y, s=10,c= dfplot_tmp["color"])
                        axs[i, j].plot([min(x), max(x)],
                                [min(x) * regression_result.slope + regression_result.intercept,
                                max(x) * regression_result.slope + regression_result.intercept],
                                c='black',
                                linewidth=1.5,
                                dashes=[4, 2]
                                )
                        textstr = '\n'.join(['r={:.3f}'.format(regression_result.rvalue),
                                'p={:.3e}'.format(regression_result.pvalue)])
                        axs[i, j].text(x = 0.02,
                                y = 0.9,   
                                s = textstr,
                                transform=axs[i, j].transAxes, 
                                fontsize=12,
                                verticalalignment='top'
                                )

                        if soma_label.__contains__("ML"):
                                axs[i, j].invert_xaxis()

                        if ter_label.__contains__("ML"):
                                axs[i, j].invert_yaxis()


                        axs[i, j].set_title(soma_label+"-"+ter_label)
                        axs[i, j].set_xlabel(soma_label)
                        axs[i, j].set_ylabel(ter_label)
        plt.suptitle('Subtype22_%s_%s'%(region[0],geno), fontsize=16, fontweight='bold')
        plt.subplots_adjust(wspace=0.25, hspace=0.35) 
        plt.savefig("Subtype22_%s_%s.jpg"%(region[0],geno),dpi=600)
        plt.savefig("Subtype22_%s_%s.pdf"%(region[0],geno),dpi=600)
        plt.close()
        