In [None]:
import pandas as pd
import numpy as np
import sys
import os
import scanpy as sp
import anndata as an
from sklearn.metrics import silhouette_score

In [None]:
tissues = ['bladder','brain','diaphragm','fat BAT','fat GAT','fat MAT','fat SCAT','heart','kidney','large intestine','limb muscle','liver','lung','marrow','pancreas','skin','spleen','thymus','tongue','trachea']

In [None]:
folder = '../pb_results/'
tissue_data_red = {}
tissue_data_redo = {}
for tissue in tissues:
        print(tissue, end = ' ')
        tissue_data_red[tissue]= an.read_h5ad(folder + 'tissue_data_red_' + tissue + '.h5ad')
        tissue_data_redo[tissue]= an.read_h5ad(folder + 'tissue_data_redo_' + tissue + '.h5ad')

### Generating input for Lisi calculation in R

In [None]:
for tissue in tissues:
    tissue_data_red[tissue].obs[['mouse_id','data','condition']].to_csv(folder + 'batch_'+tissue+'.csv', index=False)
    tissue_data_redo[tissue].obs[['mouse_id','data','condition']].to_csv(folder + 'batcho_'+tissue+'.csv', index=False)
    np.savetxt(folder + 'umap_'+tissue+'.csv',tissue_data_red[tissue].obsm['X_umap'])
    np.savetxt(folder + 'umapo_'+tissue+'.csv',tissue_data_redo[tissue].obsm['X_umap'])

### Reading Lisi results

In [None]:
lisi = {}
lisio = {}
for tissue in tissues:
    lisi[tissue] = pd.read_csv(folder + 'lisi_'+tissue+'.csv')
    lisio[tissue] = pd.read_csv(folder + 'lisio_'+tissue+'.csv')
    for col in ['mouse_id','data','condition']:
        tissue_data_red[tissue].obs['lisi_ori_'+col] = np.array(lisio[tissue][col])
        tissue_data_red[tissue].obs['lisi_corr_'+col] = np.array(lisi[tissue][col])

### Silhouette score calculation

In [None]:
x = []
keys = ['ori','corr']
typs = [tissue_data_redo,tissue_data_red]
for tissue in tissues:
    ii=0
    for ti in typs:
        X = ti[tissue].obsm['X_umap']
        c = np.array(ti[tissue].obs['leiden'])
        s = silhouette_score(X,c)
        tissue_data_red[tissue].obs['ari_'+keys[ii]] = s
        ii+=1

### Entropy batch mixing calculation

In [None]:
from scipy.stats import entropy
batches = ['data','mouse_id','condition']
ent = {}
for tissue in tissues:
    tt=0
    for typ in typs:
      d = typ[tissue]
      n = d.uns['neighbors']['connectivities']
      ent = {}
      for batch in batches:
          ent[batch] = []
      for ii in range(n.shape[0]):
          x = n.getrow(ii).nonzero()[1]
          for batch in batches:
              b = d.obs[batch][x]
              b = b.value_counts()
              ent[batch].append(entropy(b))
      for batch in batches:
        tissue_data_red[tissue].obs['ent_' + keys[tt]+ '_' + batch] = ent[batch]
      tt+=1

In [None]:
for tissue in tissues:
    print(tissue,end = ' ')
    tissue_data_red[tissue].write(folder + 'tissue_data_red_' + tissue + '.h5ad')