In [1]:
import scanpy as sc
import numpy as np
import pandas as pd
import os
from SDMBench import *
import palettable
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
pd_df = pd.read_feather('performance_summary.feather')

In [3]:
pd_df

Unnamed: 0,index,DataName,DataID,Biotech,BiotechType,Replicate,Method,NMI,HOM,COM,CHAOS,PAS,ASW,MoranI,GearyC,time,memory
0,0,mouse_VISp,Dataset21,SeqFISH,Imaging,0,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,243.47508,1200.5
1,1,mouse_VISp,Dataset21,SeqFISH,Imaging,1,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,238.90554,1200.5
2,2,mouse_VISp,Dataset21,SeqFISH,Imaging,2,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,242.60538,1200.5
3,3,mouse_VISp,Dataset21,SeqFISH,Imaging,3,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,238.09740,1200.6
4,4,mouse_VISp,Dataset21,SeqFISH,Imaging,4,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,241.77096,1200.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3747,2308,Slice_2,Dataset19,BaristaSeq,Sequencing,5,BASS,0.816240,0.822387,0.810183,0.048557,0.029383,0.150663,0.086518,0.899413,208.58748,725.1
3748,2309,Slice_2,Dataset19,BaristaSeq,Sequencing,6,BASS,0.816240,0.822387,0.810183,0.048557,0.029383,0.150663,0.086518,0.899413,208.05342,725.1
3749,2310,Slice_2,Dataset19,BaristaSeq,Sequencing,7,BASS,0.816240,0.822387,0.810183,0.048557,0.029383,0.150663,0.086518,0.899413,206.02722,725.1
3750,2311,Slice_2,Dataset19,BaristaSeq,Sequencing,8,BASS,0.816240,0.822387,0.810183,0.048557,0.029383,0.150663,0.086518,0.899413,207.49020,725.1


In [4]:
pd_df['ASW_scale'] = (pd_df['ASW']+1)/2

In [5]:
np.unique(pd_df['Method']).shape

(14,)

In [6]:
import palettable

method_order = [
    'louvain','leiden',
    'SpaGCN_without','SpaGCN_with','BayesSpace','stLearn','SEDR',
    'CCST','SCAN-IT','STAGATE','SpaceFlow','conST_nopre','BASS','DeepST'
]

cmp = palettable.tableau.Tableau_20.mpl_colors
method_color_dict = dict(zip(method_order,cmp))


In [7]:
metrics_list = [
    'NMI','HOM','COM',
    'CHAOS','PAS','ASW_scale','ASW',
    'MoranI','GearyC',
    'time','memory'
]

In [8]:
cmp = palettable.tableau.Tableau_20.mpl_colors
method_color_dict = dict(zip(method_order,cmp))


In [10]:
cmp = palettable.tableau.Tableau_20.mpl_colors
method_color_dict = dict(zip(method_order,cmp))
def make_cmp(method_color_dict,method_list):
    idx = []
    method_key = list(method_color_dict.keys())
    color_val = []
    for i in range(len(method_color_dict)):
        cur_method = method_key[i]
        cur_color = method_color_dict[cur_method]
        color_val.append(cur_color)
        if cur_method in method_list:
            idx.append(i)
    return [method_key[i] for i in idx],[color_val[i] for i in idx]
    

In [58]:
for data in np.unique(pd_df['DataID']):
    os.makedirs(f'figures/default/{data}',exist_ok=True)
    pd_df_data = pd_df[pd_df['DataID']==data]
    for metrics_use in metrics_list:
        cur_method_order,cur_cmp = make_cmp(method_color_dict,np.unique(pd_df_data['Method']))
        fig,ax = plt.subplots(1,1,figsize=(2,5))

        ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',
                         # order=['spagcn','stagate','spaceflow','MS'],
                         order = cur_method_order,
                    # hue='Method',
                        ci="sd", 
                        edgecolor="black",
            errcolor="black",
            errwidth=1.5,
            capsize = 0.1,
            # alpha=0.5,

                    palette=cur_cmp
                   )

        sns.stripplot(
            pd_df_data,
            y="Method", 
            x=metrics_use, 
                         # order=['spagcn','stagate','spaceflow','MS'],
            order = cur_method_order,

            # hue="method", 
             dodge=True, 
            # alpha=0.6, 
            ax=ax,
            color='k',
            size=2
                    # palette=cmp



        )
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles[2:], labels[2:], title='Method', bbox_to_anchor=(1, 1.02), loc='upper left')



        plt.savefig(f'figures/default/{data}/{metrics_use}.pdf',dpi=400,bbox_inches='tight',transparent=True)
        plt.close()


The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same e

In [11]:
pd_df

Unnamed: 0,index,DataName,DataID,Biotech,BiotechType,Replicate,Method,NMI,HOM,COM,CHAOS,PAS,ASW,MoranI,GearyC,time,memory,ASW_scale
0,0,mouse_VISp,Dataset21,SeqFISH,Imaging,0,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,243.47508,1200.5,0.566766
1,1,mouse_VISp,Dataset21,SeqFISH,Imaging,1,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,238.90554,1200.5,0.566766
2,2,mouse_VISp,Dataset21,SeqFISH,Imaging,2,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,242.60538,1200.5,0.566766
3,3,mouse_VISp,Dataset21,SeqFISH,Imaging,3,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,238.09740,1200.6,0.566766
4,4,mouse_VISp,Dataset21,SeqFISH,Imaging,4,BASS,0.352058,0.343537,0.361012,0.040216,0.016249,0.133532,0.344711,0.654205,241.77096,1200.6,0.566766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3747,2308,Slice_2,Dataset19,BaristaSeq,Sequencing,5,BASS,0.816240,0.822387,0.810183,0.048557,0.029383,0.150663,0.086518,0.899413,208.58748,725.1,0.575331
3748,2309,Slice_2,Dataset19,BaristaSeq,Sequencing,6,BASS,0.816240,0.822387,0.810183,0.048557,0.029383,0.150663,0.086518,0.899413,208.05342,725.1,0.575331
3749,2310,Slice_2,Dataset19,BaristaSeq,Sequencing,7,BASS,0.816240,0.822387,0.810183,0.048557,0.029383,0.150663,0.086518,0.899413,206.02722,725.1,0.575331
3750,2311,Slice_2,Dataset19,BaristaSeq,Sequencing,8,BASS,0.816240,0.822387,0.810183,0.048557,0.029383,0.150663,0.086518,0.899413,207.49020,725.1,0.575331


Unnamed: 0,index,DataName,DataID,Biotech,BiotechType,Replicate,Method,NMI,HOM,COM,CHAOS,PAS,ASW,MoranI,GearyC,time,memory,ASW_scale
340,340,osmfish,Dataset27,osmFISH,Imaging,0,SpaGCN_without,0.248057,0.253438,0.242900,0.036838,0.663154,-0.173178,0.182687,0.813544,129.400319,1034.966228,0.413411
341,341,osmfish,Dataset27,osmFISH,Imaging,1,SpaGCN_without,0.248057,0.253438,0.242900,0.036838,0.663154,-0.173178,0.182687,0.813544,120.172936,985.515541,0.413411
342,342,osmfish,Dataset27,osmFISH,Imaging,2,SpaGCN_without,0.248057,0.253438,0.242900,0.036838,0.663154,-0.173178,0.182687,0.813544,118.135454,985.412717,0.413411
343,343,osmfish,Dataset27,osmFISH,Imaging,3,SpaGCN_without,0.248057,0.253438,0.242900,0.036838,0.663154,-0.173178,0.182687,0.813544,119.008954,985.412114,0.413411
344,344,osmfish,Dataset27,osmFISH,Imaging,4,SpaGCN_without,0.248057,0.253438,0.242900,0.036838,0.663154,-0.173178,0.182687,0.813544,121.220424,985.411199,0.413411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,445,osmfish,Dataset27,osmFISH,Imaging,5,STAGATE,0.507953,0.531596,0.486325,0.024295,0.167597,-0.111794,0.197198,0.796880,17.207911,272.875998,0.444103
446,446,osmfish,Dataset27,osmFISH,Imaging,6,STAGATE,0.531718,0.558447,0.507431,0.024552,0.164290,-0.096107,0.197198,0.796880,16.806531,272.874997,0.451946
447,447,osmfish,Dataset27,osmFISH,Imaging,7,STAGATE,0.525651,0.557385,0.497335,0.024907,0.169663,-0.087471,0.205025,0.790255,17.109708,272.878586,0.456264
448,448,osmfish,Dataset27,osmFISH,Imaging,8,STAGATE,0.526610,0.553862,0.501913,0.023948,0.162430,-0.109842,0.205025,0.790255,17.072913,272.875978,0.445079


In [12]:
for data in np.unique(pd_df['Biotech']):
    os.makedirs(f'figures/biotech_score/{data}',exist_ok=True)
    pd_df_data = pd_df[pd_df['Biotech']==data]
    for metrics_use in metrics_list:
        cur_method_order,cur_cmp = make_cmp(method_color_dict,np.unique(pd_df_data['Method']))
        fig,ax = plt.subplots(1,1,figsize=(2,5))

        ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',
                         # order=['spagcn','stagate','spaceflow','MS'],
                         order = cur_method_order,
                    # hue='Method',
                        ci="sd", 
                        edgecolor="black",
            errcolor="black",
            errwidth=1.5,
            capsize = 0.1,
            # alpha=0.5,

                    palette=cur_cmp
                   )

        sns.stripplot(
            pd_df_data,
            y="Method", 
            x=metrics_use, 
                         # order=['spagcn','stagate','spaceflow','MS'],
            order = cur_method_order,

            # hue="method", 
             dodge=True, 
            # alpha=0.6, 
            ax=ax,
            color='k',
            size=2
                    # palette=cmp



        )
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles[2:], labels[2:], title='Method', bbox_to_anchor=(1, 1.02), loc='upper left')



        plt.savefig(f'figures/biotech_score/{data}/{metrics_use}.pdf',dpi=400,bbox_inches='tight',transparent=True)
        plt.close()


The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same e

## order by value

In [14]:
metrics_order_principle = {
    'NMI':'asc',
    'HOM':'asc',
    'COM':'asc',
    'CHAOS':'des',
    'PAS':'des',
    'ASW':'asc',
    'ASW_scale':'asc',
    
    'MoranI':'asc',
    'GearyC':'des',
    'time':'des',
    'memory':'des'
}


In [22]:
pd_df_data.groupby('Method').agg('median').sort_values('NMI').index[::-1]



  pd_df_data.groupby('Method').agg('median').sort_values('NMI').index[::-1]


Index(['BASS', 'SpaceFlow', 'DeepST', 'STAGATE', 'SCAN-IT', 'CCST', 'louvain',
       'leiden', 'SpaGCN_without', 'SEDR', 'conST_nopre'],
      dtype='object', name='Method')

In [24]:
method_color_dict

{'louvain': (0.12156862745098039, 0.4666666666666667, 0.7058823529411765),
 'leiden': (0.6823529411764706, 0.7803921568627451, 0.9098039215686274),
 'SpaGCN_without': (1.0, 0.4980392156862745, 0.054901960784313725),
 'SpaGCN_with': (1.0, 0.7333333333333333, 0.47058823529411764),
 'BayesSpace': (0.17254901960784313, 0.6274509803921569, 0.17254901960784313),
 'stLearn': (0.596078431372549, 0.8745098039215686, 0.5411764705882353),
 'SEDR': (0.8392156862745098, 0.15294117647058825, 0.1568627450980392),
 'CCST': (1.0, 0.596078431372549, 0.5882352941176471),
 'SCAN-IT': (0.5803921568627451, 0.403921568627451, 0.7411764705882353),
 'STAGATE': (0.7725490196078432, 0.6901960784313725, 0.8352941176470589),
 'SpaceFlow': (0.5490196078431373, 0.33725490196078434, 0.29411764705882354),
 'conST_nopre': (0.7686274509803922, 0.611764705882353, 0.5803921568627451),
 'BASS': (0.8901960784313725, 0.4666666666666667, 0.7607843137254902),
 'DeepST': (0.9686274509803922, 0.7137254901960784, 0.82352941176470

In [26]:
{m:method_color_dict[m] for m in cur_m}

{'STAGATE': (0.7725490196078432, 0.6901960784313725, 0.8352941176470589),
 'conST_nopre': (0.7686274509803922, 0.611764705882353, 0.5803921568627451),
 'SEDR': (0.8392156862745098, 0.15294117647058825, 0.1568627450980392),
 'SCAN-IT': (0.5803921568627451, 0.403921568627451, 0.7411764705882353),
 'SpaceFlow': (0.5490196078431373, 0.33725490196078434, 0.29411764705882354),
 'leiden': (0.6823529411764706, 0.7803921568627451, 0.9098039215686274),
 'louvain': (0.12156862745098039, 0.4666666666666667, 0.7058823529411765),
 'SpaGCN_without': (1.0, 0.4980392156862745, 0.054901960784313725),
 'CCST': (1.0, 0.596078431372549, 0.5882352941176471),
 'BASS': (0.8901960784313725, 0.4666666666666667, 0.7607843137254902),
 'DeepST': (0.9686274509803922, 0.7137254901960784, 0.8235294117647058)}

In [27]:
for data in np.unique(pd_df['Biotech']):
    os.makedirs(f'figures/biotech_score_sort/{data}',exist_ok=True)
    pd_df_data = pd_df[pd_df['Biotech']==data]
    for metrics_use in metrics_list:
        if metrics_order_principle[metrics_use]=='asc':
            cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index[::-1]
        else:
            cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index
            

        cur_method_order,cur_cmp = make_cmp({m:method_color_dict[m] for m in cur_m},cur_m)
        fig,ax = plt.subplots(1,1,figsize=(2,5))

        ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',
                         # order=['spagcn','stagate','spaceflow','MS'],
                         order = cur_method_order,
                    # hue='Method',
                        ci="sd", 
                        edgecolor="black",
            errcolor="black",
            errwidth=1.5,
            capsize = 0.1,
            # alpha=0.5,

                    palette=cur_cmp
                   )

        sns.stripplot(
            pd_df_data,
            y="Method", 
            x=metrics_use, 
                         # order=['spagcn','stagate','spaceflow','MS'],
            order = cur_method_order,

            # hue="method", 
             dodge=True, 
            # alpha=0.6, 
            ax=ax,
            color='k',
            size=2
                    # palette=cmp



        )
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles[2:], labels[2:], title='Method', bbox_to_anchor=(1, 1.02), loc='upper left')



        plt.savefig(f'figures/biotech_score_sort/{data}/{metrics_use}.pdf',dpi=400,bbox_inches='tight',transparent=True)
        plt.close()

  cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index[::-1]

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',
  cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index[::-1]

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',
  cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index[::-1]

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',
  cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index

The `ci` parameter is deprecated. Use `errorbar='sd'` for the same effect.

  ax = sns.barplot(pd_df_data,x=metrics_use,y='Method',
  cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index

The `ci` parameter is depreca

In [32]:
for data in np.unique(pd_df['Biotech']):
    os.makedirs(f'figures/biotech_score_sort/{data}',exist_ok=True)
    pd_df_data = pd_df[pd_df['Biotech']==data]
    for metrics_use in metrics_list:
        if metrics_order_principle[metrics_use]=='asc':
            cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index[::-1]
        else:
            cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index
            

        cur_method_order,cur_cmp = make_cmp({m:method_color_dict[m] for m in cur_m},cur_m)
        fig,ax = plt.subplots(1,1,figsize=(2,5))

        ax = sns.boxplot(pd_df_data,x=metrics_use,y='Method',
                         # order=['spagcn','stagate','spaceflow','MS'],
                         order = cur_method_order,
                    # hue='Method',
                        # ci="sd", 
                        # edgecolor="black",
            # errcolor="black",
            # errwidth=1.5,
            # capsize = 0.1,
            # alpha=0.5,

                    palette=cur_cmp
                   )

        sns.stripplot(
            pd_df_data,
            y="Method", 
            x=metrics_use, 
                         # order=['spagcn','stagate','spaceflow','MS'],
            order = cur_method_order,

            # hue="method", 
             dodge=True, 
            # alpha=0.6, 
            ax=ax,
            color='k',
            size=2
                    # palette=cmp



        )
        handles, labels = ax.get_legend_handles_labels()
        # ax.legend(handles[2:], labels[2:], title='Method', bbox_to_anchor=(1, 1.02), loc='upper left')
        ax.legend([])
        
        ax.legend(frameon=False)


        plt.savefig(f'figures/biotech_score_sort/{data}/{metrics_use}.pdf',dpi=400,bbox_inches='tight',transparent=True)
        plt.close()

  cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index[::-1]
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
  cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index[::-1]
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
  cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index[::-1]
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
  cur_m = pd_df_data.groupby('Method').agg('median').sort_values(metrics_use).index
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
  cur_m = pd_df_da