# Analysis of the scRNA-seq immune cells from adipocyte tissue

## Table of contents:

* <a href=#Load>Load Packages and Set Global Variables</a>
    * <a href=#Imports>Imports and Settings</a>
    * <a href=#Global>Global Variables</a> 
* <a href=#Dataloading>Loading Data, Quality Control and Preprocessing</a>
    * <a href=#Counts>Gene numbers and counts with and without mitochondrial RNA</a>
* <a href=#Allcells>All cells - normalization, projection and clustering</a>
* <a href=#Define>Define Cell Types</a>
* <a href=#astrocytes>Astrocytes Only</a>
    * <a href=#Embedding>Embeddings and Clustering</a>
    * <a href=#adipmarkers>Astrocyte Marker Analysis</a>
    * <a href=#topde>Top ranking DE Genes</a>
    * <a href=#count_dist>Count distribution for Gfap, Aldh1l1 and Slc1a3</a>
* <a href=#traject>Gfap and Aldh1l1 only</a>

# Load Packages and Set Global Variables

<a id="imports"></a>

## Imports and Settings

In [1]:
import numpy as np
import scanpy as sc
import scipy as sci
import scipy.sparse
import pandas as pd
import seaborn as sb
import scvelo as scv
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import colors
from gprofiler import GProfiler
import custom_functions as cf
from matplotlib_venn import venn3_unweighted
from scipy import stats
import pingouin as pg
import matplotlib_venn
import statistics
import gseapy
import sys
import re
import os

import batchglm
import diffxpy.api as de

import warnings
warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2
sc.settings.verbosity=3 # amount of output

base_dir='/Users/viktorian.miok/Documents/consultation/Xiaocheng/'
dat_start='/Users/viktorian.miok/Documents/consultation/Altun-Ussar/data/'
dat_end='/count_matrices/filtered_gene_bc_matrices/mm10_ensrel94/'
dir_out='/Users/viktorian.miok/Documents/consultation/Xiaocheng/results/'
dir_tables=dir_out+'tables/'
sc_settings_figdir=dir_out+'figures/'
sc_settings_writedir=dir_out+'anndata/'
sc.logging.print_versions()
os.chdir(dir_out)
sc.settings.set_figure_params(dpi=80, scanpy=True)
print (sys.version)



-----
anndata     0.7.5
scanpy      1.7.1
sinfo       0.3.1
-----
PIL                 8.1.2
PyObjCTools         NA
anndata             0.7.5
appdirs             1.4.4
appnope             0.1.2
autoreload          NA
backcall            0.2.0
batchglm            v0.7.4
bioservices         1.7.11
bs4                 4.9.3
certifi             2020.12.05
cffi                1.14.5
chardet             4.0.0
cloudpickle         1.6.0
colorama            0.4.4
colorlog            NA
custom_functions    NA
cycler              0.10.0
cython_runtime      NA
dask                2021.03.0
dateutil            2.8.1
decorator           4.4.2
diffxpy             v0.7.4
docutils            0.16
easydev             0.11.0
get_version         2.1
gprofiler           1.0.0
gseapy              0.10.4
h5py                3.2.1
idna                2.10
igraph              0.9.0
ipykernel           5.4.3
ipython_genutils    0.2.0
ipywidgets          7.6.3
jedi                0.17.2
joblib              1.0.1


In [2]:
#Define a nice colour map for gene expression
colors2=plt.cm.Reds(np.linspace(0, 1, 128))
colors3=plt.cm.Greys_r(np.linspace(0.7,0.8,20))
colorsComb=np.vstack([colors3, colors2])
mymap=colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)
sc.set_figure_params(scanpy=True, fontsize=17)

## Global Variables

All embeddings and clusterings can be saved and loaded into this script. Be carful with overwriting cluster caches as soon as cell type annotation has started as cluster labels may be shuffled.

Set whether anndata objects are recomputed or loaded from cache.

In [3]:
bool_recomp=False

Set whether clustering is recomputed or loaded from saved .obs file. Loading makes sense if the clustering changes due to a change in scanpy or one of its dependencies and the number of clusters or the cluster labels change accordingly.

In [4]:
bool_recluster=False

Set whether cluster cache is overwritten. Note that the cache exists for reproducibility of clustering, see above.

In [5]:
bool_write_cluster_cache=False

Set whether to produce plots, set to False for test runs.

In [6]:
bool_plot=False

Set whether to produce only curently interesting plots for ploting

In [7]:
bool_plot_current=True

Set whether observations should be calculated. If false, it is necessary to read cacheed file that contains the necssary information. It then shows the the distributions of counts and genes, as well as mt_frac after filtering. 
Set to true in order to see the data before filtering and follow the decisions for cutoffs.

In [8]:
bool_create_observations=True

<a id="Dataloading"></a>

# Loading Data, Quality Control and Preprocessing

Read the data in:

In [9]:
if bool_recomp:
    adata87  = sc.read_10x_mtx(path=dat_start+'MUC8387'+dat_end, 
                               var_names='gene_symbols',
                               cache=True
    )  
    adata88 = sc.read_10x_mtx(path=dat_start+'MUC8388'+dat_end,
                              var_names='gene_symbols',
                              cache=True
    )  
    adata89 = sc.read_10x_mtx(path=dat_start+'MUC8389'+dat_end,
                              var_names='gene_symbols',
                              cache=True
    )  
    adata90 = sc.read_10x_mtx(path=dat_start+'MUC8390'+dat_end,
                              var_names='gene_symbols',
                              cache=True
    )  
    adataY = sc.read_10x_mtx(path=dat_start+'PreYoung'+dat_end, 
                             var_names='gene_symbols', 
                             cache=True
    )  
    adataO = sc.read_10x_mtx(path=dat_start+'PreOld'+dat_end,
                             var_names='gene_symbols',
                             cache=True
    )  
    adata_raw=adata87.concatenate(adata88, adata89, adata90, adataY, adataO)

    p=adata_raw.obs_names.str.endswith
    groups=['BAT-2', 'BAT-8', 'PGF-2', 'PGF-8', 'SCF-2', 'SCF-8']
    adata_raw.obs['batch_type'] = np.select([p('-0'), p('-1'), p('-2'), p('-3'), p('-4'), p('-5')],
                                            groups
    )
    adata_raw.obs['tissue_type'] = np.select([(p('-0')|p('-1')), (p('-2')|p('-3')), (p('-4')|p('-5'))],
                                             ['BAT', 'PGF', 'SCF']
    )
    adata_raw.obs['age_type'] = np.select([(p('-0')|p('-2')|p('-4')), (p('-1')|p('-3')|p('-5'))], 
                                          ['young', 'old'])

    adata_raw.var_names_make_unique()
    sc.write(sc_settings_writedir+'adata_raw.h5ad', adata_raw)
else:
    adata_raw=sc.read(sc_settings_writedir+'adata_raw.h5ad')

Summary of steps performed here: Only cells with at least 500 UMIs are kept. Counts per cell are cell library depth normalized. The gene (feature) space is reduced with PCA to 50 PCs. A nearest neighbour graph and umap are computed based on the PC space. Cell are clustered with louvain clustering based on the nearest neighbour graph. Graph abstraction is computed based on the louvain clustering.

In [10]:
sc.pp.filter_cells(adata_raw, min_counts=1)

The data contains 21143 observations with 31253 different genes. Due to dropouts, some of the observations might not show any counts and genes. In order to calculate the fraction of mitochondrial RNA in the next steps, each observations without counts must be filtered out to prevent NaN from emerging. 

In [11]:
print('Number of cells: {:d}'.format(adata_raw.n_obs))
print('Number of genes: {:d}'.format(adata_raw.shape[1]))
print('Number of cells per tissue_type:')
adata_raw.obs['tissue_type'].value_counts().sort_index()

Number of cells: 24733
Number of genes: 31125
Number of cells per tissue_type:


BAT    6472
PGF    8917
SCF    9344
Name: tissue_type, dtype: int64

### Gene numbers and counts with and without mitochondrial RNA

Create necessary obs:

In [12]:
adata_qc=adata_raw.copy()
adata_qc.obs['n_genes']=(adata_qc.X > 0).sum(1)
mt_gene_mask=[gene.startswith('mt-') for gene in adata_qc.var_names]
temp_mt_sum=adata_qc[:,mt_gene_mask].X.sum(1)
temp_mt_sum=np.squeeze(np.asarray(temp_mt_sum))
adata_qc.obs['n_counts']=adata_qc.X.sum(1)
temp_n_counts=adata_qc.obs['n_counts']
adata_qc.obs['mt_frac']=temp_mt_sum/adata_qc.obs['n_counts']

Plot n_counts and mt_frac:

In [13]:
if bool_plot == True:
    t1 = sc.pl.violin(adata_qc,
                      ['n_counts', 'n_genes', 'mt_frac'],
                      size=1, 
                      log=False,
                      jitter=3,
                      multi_panel=True
    )

In [14]:
if bool_plot==True:
    sc.pl.highest_expr_genes(adata_qc,
                             n_top=20
    ) 

Overall, the data contains a lot of observations with high fractions of mitochondrial RNA. Additionally, most observations show counts below 100, suggesting poor data quality. To further investigate the distributions counts over genes per observations, scatterplots are created:

### Number of Genes versus Number of Counts

In [15]:
if bool_plot == True:
    p1 = sc.pl.scatter(adata_qc, 
                       'n_counts', 
                       'n_genes', 
                       color='mt_frac', 
                       size=5
    )
    p2 = sc.pl.scatter(adata_qc[adata_qc.obs['n_counts'] < 5000],
                       'n_counts',
                       'n_genes', 
                       color='mt_frac',
                       size=5
    )

### Distribution of Counts and Genes

For the remaining observations, the fraction of mitochondrial RNA is generally very low and at most 20%

In [16]:
if bool_plot == True:
    p6 = sb.distplot(adata_qc.obs['n_counts'],
                     kde=False
    )
    plt.show()
    p7 = sb.distplot(adata_qc.obs['n_counts'][adata_qc.obs['n_counts'] < 5000], 
                     kde=False)
    plt.show()

In [17]:
if bool_plot == True:
    p9 = sb.distplot(adata_qc.obs['n_genes'],
                     kde=False, 
                     bins=60
    )
    plt.show()
    p10 = sb.distplot(adata_qc.obs['n_genes'][adata_qc.obs['n_genes'] < 500],
                      kde=False,
                      bins=60
    )
    plt.show()

### Filtering

In [18]:
# Filter cells according to identified QC thresholds:
print('Total number of cells: {:d}'.format(adata_qc.n_obs))

sc.pp.filter_cells(adata_qc,
                   min_counts=1500
)
print('Number of cells after min count filter: {:d}'.format(adata_qc.n_obs))

sc.pp.filter_cells(adata_qc,
                   max_counts=25000
)
print('Number of cells after max count filter: {:d}'.format(adata_qc.n_obs))

adata_qc = adata_qc[adata_qc.obs['mt_frac'] < 0.2]
print('Number of cells after MT filter: {:d}'.format(adata_qc.n_obs))

sc.pp.filter_cells(adata_qc, 
                   min_genes=300
)
print('Number of cells after gene filter: {:d}'.format(adata_qc.n_obs))

filtered out 701 cells that have less than 1500 counts


Total number of cells: 24733


filtered out 66 cells that have more than 25000 counts


Number of cells after min count filter: 24032
Number of cells after max count filter: 23966
Number of cells after MT filter: 23939


filtered out 1279 cells that have less than 300 genes expressed
Trying to set attribute `.obs` of view, copying.


Number of cells after gene filter: 22660


In [19]:
#Filter genes:
print('Total number of genes: {:d}'.format(adata_qc.n_vars))

# Min 20 cells - filters out 0 count genes
sc.pp.filter_genes(adata_qc, 
                   min_cells=30
)
print('Number of genes after cell filter: {:d}'.format(adata_qc.n_vars))

Total number of genes: 31125


filtered out 14148 genes that are detected in less than 30 cells


Number of genes after cell filter: 16977


In [20]:
if bool_plot == True:
    p1 = sc.pl.scatter(adata_qc, 
                       'n_counts',
                       'n_genes',
                       color='mt_frac',
                       size=5
    )
    p3 = sc.pl.scatter(adata_qc[adata_qc.obs['n_counts'] < 5000],
                       'n_counts',
                       'n_genes',
                       color='mt_frac', 
                       size=5
    )

In [21]:
print('Number of cells: {:d}'.format(adata_qc.n_obs))
print('Number of genes: {:d}'.format(adata_qc.shape[1]))
print('Number of cells per tissue_type:')
adata_qc.obs['batch_type'].value_counts().sort_index()

Number of cells: 22660
Number of genes: 16977
Number of cells per tissue_type:


BAT-2    2658
BAT-8    2343
PGF-2    6303
PGF-8    2128
SCF-2    3892
SCF-8    5336
Name: batch_type, dtype: int64

## All cells - normalization, projection and clustering

Remove BAT cells and perform the standard analysis

In [22]:
if bool_recomp == True:
    adata_qc=adata_qc[adata_qc.obs['tissue_type'] != 'BAT',:] # remove brown adiopoytes
    adata_proc=adata_qc.copy()
    adata_proc.raw=adata_qc
    sc.pp.normalize_per_cell(adata_proc)
    sc.pp.log1p(adata_proc)
    sc.pp.combat(adata_proc, 
                 key='batch'
    )
    sc.pp.highly_variable_genes(adata_proc,
                                flavor='cell_ranger',
                                n_top_genes=4000
    )
    sc.pl.highly_variable_genes(adata_proc)
    #adata_proc.X=adata_proc.X.toarray()
    
    sc.pp.pca(adata_proc,
              n_comps=50,
              random_state=0, 
              use_highly_variable=True,
              svd_solver='arpack'
    )
    sc.pp.neighbors(adata_proc,
                    n_neighbors=100,
                    knn=True, 
                    method='umap',
                    n_pcs=50, 
                    random_state=0
    )
    sc.tl.umap(adata_proc)
    if bool_recluster == True:
        #sc.tl.louvain(adata_proc,
        #              resolution=0.5,
        #              flavor='vtraag',
        #              random_state=0
        #)
        sc.tl.leiden(adata_proc,
                     resolution=0.5
        )
        pd.DataFrame(adata_proc.obs).to_csv(path_or_buf=sc_settings_writedir+"obs_adata_proc.csv")
    else:
        obs=pd.read_csv(sc_settings_writedir+'obs_adata_proc.csv')
        adata_proc.obs['leiden']=pd.Series(obs['leiden'].values, dtype='category')
    sc.write(sc_settings_writedir+'adata_proc.h5ad', adata_proc)
else:
    adata_proc=sc.read(sc_settings_writedir+'adata_proc.h5ad') 
sc.tl.paga(adata_proc)

running PAGA
    finished: added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:01)


In [23]:
adata_proc.obs['batch_type'].value_counts()

PGF-2    6303
SCF-8    5336
SCF-2    3892
PGF-8    2128
Name: batch_type, dtype: int64

Produce some summarizing plots that show the global characteristics of the data.

In [24]:
if bool_plot == True:# A530016L24Rik
    plt.rcParams['figure.figsize']=[10,10]
    cf.plot_umap_marker(adata_proc,
                        ['Pdgfra'],
                        save="_pgf-scf_cells_Pdgfra",
                        use_raw=False, 
                        color_map=mymap,
                        size=50
    )

In [25]:
if bool_plot == True:# cells in the cluster 13 need to be removed Endothelial
    plt.rcParams['figure.figsize']=[10,10]
    cf.plot_umap_marker(adata_proc,
                        ['Pecam1'],
                        save="_pgf-scf_cells_Pecam1",
                        use_raw=False, 
                        color_map=mymap,
                        size=50
    )

In [26]:
if bool_plot == True:
    plt.rcParams['figure.figsize']=[10,10]
    cf.plot_umap_marker(adata_proc,
                        ['leiden'], 
                        save="_pgf-scf_cells_leiden_clustering",
                        use_raw=False, 
                        legend_loc='on data', 
                        size=50
    )

Number of cells in each cluster:

In [27]:
adata_proc.obs["leiden"].value_counts()

0     2088
1     1970
2     1732
3     1519
4     1378
5     1240
6     1216
7      864
8      765
9      759
10     758
11     632
12     516
13     480
14     468
15     415
16     274
17     161
18     147
19     114
20      98
21      65
Name: leiden, dtype: int64

In [28]:
#####################################################################################################################
if bool_plot==True:
    marker_genes_dict={'Immune cell': ['Ptprc'],
                       'Macrophage': ['Cd68','Ccl6','Adgre1','Cd14','C1qc','Lyz2'],
                       'Monocyte': ['Csf1r','Ly6c2','Lyz1','Fn1','Spn','Ccr2'],
                       'cDc': ['Itgax',],
                       'cDc1': ['Irf8','Batf3','Xcr1','Cadm1'],
                       'cDc2': ['Cd209a','Irf4','Mgl2'],
                       'Mast cell': ['Fcer1a','Kit','Il1rl1'],
                       'Granulocyte': ['Itgam'],
                       'Neutrophil': ['Ceacam1','Cxcr2','Cxcr4','Sell','Cd63'], # ,'Ly6g'
                       'T Cell': ['Cd3g','Cd3d','Cd3e'],
                       'CD4+ T Cell': ['Cd4','Sell'],
                       'CD8+ T Cell': ['Cd8a','Cd8b1'],
                       'B Cell': ['Cd19','Cd22','Ms4a1','Cd79a','Cd79b'],
                       'NK Cell': ['Klrd1','Klrc1','Ncr1','Il2rb','Klrb1c','Klrk1'],
                       'ILCs': ['Il7r','Cd200r1','Ccr6'],
    } 
    for i in list(marker_genes_dict.keys()):
        marker_genes_dict[i].sort()
        
    sc.pl.dotplot(adata=adata_proc,
                  var_names=marker_genes_dict, 
                  groupby='leiden',
                  use_raw=True, 
                  log=False, 
                  dendrogram=True, 
                  var_group_rotation=90, 
                  show=True, 
                  #size_title=5,
                  save="pgf-scf_cells_celltypes_markers.pdf"
    )

# Immune cells

## Embedding and Clustering

Remove preadipocyte, presented in the clusters: 3,7,11,5,6,13(contain also endothelial cells).
Also other non-immune(Prptc-) cells from clusters: 12,18

In [29]:
if bool_recomp == True:  
    cell_ids_imuno=np.asarray(adata_proc.obs_names)[
        [x in ['0','1','2','4','8','9','10','14','15','16','17','19','20','21']# '3','7','11','5','6','13','12','18',
         for x in np.asarray(adata_proc.obs['leiden'].values)]
    ]
    adata_imuno=adata_qc[cell_ids_imuno,:].copy()  # adata_raw
    #dat=pd.DataFrame(adata_proc.X, index=adata_proc.obs.index, columns=adata_proc.var.index)
    adata_imuno.obs['n_genes']=(adata_imuno.X > 0).sum(1)
    adata_imuno.obs['n_counts']=adata_imuno.X.sum(1)
    mt_gene_mask=[gene.startswith('mt-') for gene in adata_imuno.var_names]
    temp_mt_sum=adata_imuno[:,mt_gene_mask].X.sum(1)
    temp_mt_sum=np.squeeze(np.asarray(temp_mt_sum))
    temp_n_counts=adata_imuno.obs['n_counts']
    adata_imuno.obs['mt_frac']=temp_mt_sum/adata_imuno.obs['n_counts']
    adata_imuno.raw=adata_imuno
    sc.pp.normalize_per_cell(adata_imuno)
    sc.pp.log1p(adata_imuno)
    sc.pp.highly_variable_genes(adata_imuno, n_top_genes=4000)
    #sc.pl.highly_variable_genes(adata_imuno)
    adata_imuno.X=adata_imuno.X.toarray()
    
    sc.pp.pca(adata_imuno,
              n_comps=50,
              use_highly_variable = True,
              random_state=0, 
              svd_solver='arpack'
    )
    sc.pp.neighbors(adata_imuno,
                    n_neighbors=100,
                    knn=True, 
                    method='umap',
                    n_pcs=50, 
                    random_state=0
    )
    sc.tl.umap(adata_imuno)
    if bool_recluster == True:
        sc.tl.leiden(adata_imuno, resolution=0.5)
        pd.DataFrame(adata_imuno.obs).to_csv(path_or_buf=sc_settings_writedir+'obs_adata_imuno.csv')
    else:
        obs=pd.read_csv(sc_settings_writedir+'obs_adata_imuno.csv')
        adata_imuno.obs['leiden']=pd.Series(obs['leiden'].values, dtype = 'category')
    sc.write(sc_settings_writedir+'adata_imuno.h5ad', adata_imuno)
else:
    adata_imuno=sc.read(sc_settings_writedir+'adata_imuno.h5ad') 
sc.tl.paga(adata_imuno)

running PAGA
    finished: added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:00)


In [30]:
if bool_plot == True:
    cf.plot_umap_marker(adata_imuno, 
                        ['leiden'],
                        save="_immuno_cells_leiden",
                        use_raw=False,
                        size=40,
                        legend_loc='on data'
    ) 

In [31]:
if bool_plot == True:
    plt.rcParams['figure.figsize']=[10,10]
    sc.pl.paga(adata_imuno, 
               node_size_scale=10,
               edge_width_scale=2,
               save="_immuno_cells_leiden"
    )

## Define Cell Types

<a id="DE"></a>

### Summary heatmap, dotplot and stacked_violin for cluster assignments

In [32]:
#####################################################################################################################
if bool_plot==True:
    marker_genes_dict={'Immune cell': ['Ptprc'],
                       'Macrophage': ['Adgre1','Cd14','C1qc','Mrc1','Pf4','Lyve1'],
                       'Monocyte': ['Csf1r','Lyz2','Lyz1','Fn1','Ear2','Ccr2','Fcgr4','Csf3r'],
                       'cDc': ['Itgax',],
                       'cDc1': ['Irf8','Batf3','Xcr1','Cadm1'],
                       'cDc2': ['Cd209a','Irf4','Mgl2','Clec10a','Ccr7','Fscn1'],
                       'Mast cell': ['Fcer1a','Kit','Il1rl1'],
                       'Myeloid cells': ['Itgam'],
                       'Neutrophil': ['S100a8','Cxcr2','Cxcr4','Sell','S100a9','Ly6g'],
                       'T Cell': ['Cd3g','Cd3d','Cd3e'],
                       'CD4+ T Cell': ['Cd4','Sell','Cd40lg','Cd2'],
                       'CD8+ T Cell': ['Cd8a','Cd8b1','Gzmk','Gzmb','Lef1'],
                       'B Cell': ['Cd19','Cd22','Ms4a1','Cd79a','Cd79b','Ly6d'],
                       'NK Cell': ['Klrd1','Klrc1','Ncr1','Il2rb','Klrb1c','Klrk1'],
                       'ILCs': ['Il7r','Cd200r1','Gata3']
    } 
    for i in list(marker_genes_dict.keys()):
        marker_genes_dict[i].sort()
        
    sc.pl.dotplot(adata=adata_imuno,
                  var_names=marker_genes_dict, 
                  groupby='leiden',
                  use_raw=True, 
                  log=False, 
                  dendrogram=True, 
                  var_group_rotation=90, 
                  show=True, 
                  #size_title=5,
                  save="immuno_cells_celltypes_markers.pdf"
    )

## Differential gene expression - immuno cells - clusters

In [33]:
if bool_recomp == True: 
    #adata_imuno1 = adata_imuno[:, adata_imuno.var.highly_variable] # Keep only top 4000
    sc.tl.rank_genes_groups(adata_imuno, 
                            'leiden',
                            use_raw=False,
                            method='t-test'
    )  
    # Build the 
    result = adata_imuno.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    df = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                   for group in groups for key in ['names', 'logfoldchanges', 'pvals_adj']}
    )
    df.to_csv("DGE_imuno_clusters.csv")

## UMAP with assigned cell types

In [34]:
#####################################################################################################################
if bool_plot == True:
    new_cluster_names = {'0': "B cells",
                         '1': "Macrophages",
                         '2': "Monocytes",
                         '3': "T cells",
                         '4': "T cells",
                         '5': "Monocytes",
                         '6': "NK cells",
                         '7': "cDC2Bs",
                         '8': "cDC1s",
                         '9': "ILCs",
                         '10': "Macrophages",
                         '11': "T cells",
                         '12': "Mast cells",
                         '13': "cDC2As",
                         '14': "Neutrophils"
    }
    adata_imuno.obs['celltypes'] = [new_cluster_names[x] for x in  adata_imuno.obs['leiden']]
    plt.rcParams['figure.figsize']=[10,10]
    cf.plot_umap_marker(adata_imuno, 
                        ['celltypes'], 
                        save="_immuno_cells_celltypes.png",
                        use_raw=False, 
                        frameon=True, 
                        size=50,
                        title=''
    ) 

# WAT monocytes and macrophages

## Embedding and Clustering

In [35]:
if bool_recomp == True:  
    cell_ids_mm=np.asarray(adata_imuno.obs_names)[
        [x in ['1','2','5','10']
         for x in np.asarray(adata_imuno.obs['leiden'].values)]
    ]
    adata_mm=adata_imuno[cell_ids_mm,:].copy()  # adata_raw
    adata_mm.obs['n_genes']=(adata_mm.X > 0).sum(1)
    adata_mm.obs['n_counts']=adata_mm.X.sum(1)
    mt_gene_mask=[gene.startswith('mt-') for gene in adata_mm.var_names]
    temp_mt_sum=adata_mm[:,mt_gene_mask].X.sum(1)
    temp_mt_sum=np.squeeze(np.asarray(temp_mt_sum))
    temp_n_counts=adata_mm.obs['n_counts']
    adata_mm.obs['mt_frac']=temp_mt_sum/adata_mm.obs['n_counts']
    adata_mm.raw=adata_mm
    sc.pp.normalize_per_cell(adata_mm)
    sc.pp.log1p(adata_mm)
    sc.pp.highly_variable_genes(adata_mm, n_top_genes=4000)
    #sc.pl.highly_variable_genes(adata_mm)
    #adata_mm.X=adata_mm.X.toarray()
    
    sc.pp.pca(adata_mm,
              n_comps=50,
              use_highly_variable = True,
              random_state=0, 
              svd_solver='arpack'
    )
    sc.pp.neighbors(adata_mm,
                    n_neighbors=100,
                    knn=True, 
                    method='umap',
                    n_pcs=50, 
                    random_state=0
    )
    sc.tl.umap(adata_mm)
    if bool_recluster == True:
        sc.tl.leiden(adata_mm, 
                     resolution=0.5, 
                     key_added='leiden_mm'
        )
        pd.DataFrame(adata_mm.obs).to_csv(path_or_buf=sc_settings_writedir+'obs_adata_mm.csv')
    else:
        obs=pd.read_csv(sc_settings_writedir+'obs_adata_mm.csv')
        adata_mm.obs['leiden_mm']=pd.Series(obs['leiden_mm'].values, dtype = 'category')
    sc.write(sc_settings_writedir+'adata_mm.h5ad', adata_mm)
else:
    adata_mm=sc.read(sc_settings_writedir+'adata_mm.h5ad') 
sc.tl.paga(adata_mm)

running PAGA
    finished: added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:00)


In [36]:
if bool_plot == True:
    plt.rcParams['figure.figsize']=[10,10]
    cf.plot_umap_marker(adata_mm,
                        ['leiden_mm'], 
                        save="_mm_cells_leiden_clustering",
                        use_raw=False, 
                        legend_loc='on data', 
                        size=50
    )

## Cluster cell type - monocytes and macrophages

In [37]:
#####################################################################################################################
if bool_plot==True:
    marker_genes_dict={'Macrophage': ['Adgre1','Cd14','C1qc','Mrc1','Pf4','Lyve1'],
                       'Monocyte': ['Csf1r','Lyz2','Lyz1','Fn1','Ear2','Ccr2','Fcgr4','Csf3r']
    } 
    for i in list(marker_genes_dict.keys()):
        marker_genes_dict[i].sort()
    
    sc.pl.dotplot(adata=adata_mm,
                  var_names=marker_genes_dict, 
                  groupby='leiden_mm',
                  use_raw=True, 
                  log=False, 
                  dendrogram=True, 
                  var_group_rotation=90, 
                  show=True, 
                  #size_title=5,
                  save="mm_cells_celltypes_markers.pdf"
    )

## Differential gene expression - monocytes and macrophages - clusters

In [38]:
if bool_recomp == True: 
    #adata_imuno1 = adata_imuno[:, adata_imuno.var.highly_variable] # Keep only top 4000
    sc.tl.rank_genes_groups(adata_mm, 
                            'leiden_mm',
                            use_raw=False,
                            method='t-test'
    )  
    # Build the 
    result = adata_mm.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    df = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                   for group in groups for key in ['names', 'logfoldchanges', 'pvals_adj']}
    )
    df.to_csv("DGE_mm_clusters.csv")

# WAT effector lymphocytes  

## Embedding and Clustering

In [39]:
if bool_recomp == True:  
    cell_ids_lymph=np.asarray(adata_imuno.obs_names)[
        [x in ['3','4','6','9','11']
         for x in np.asarray(adata_imuno.obs['leiden'].values)]
    ]
    adata_lymph=adata_imuno[cell_ids_lymph,:].copy()  # adata_raw
    adata_lymph.obs['n_genes']=(adata_lymph.X > 0).sum(1)
    adata_lymph.obs['n_counts']=adata_lymph.X.sum(1)
    mt_gene_mask=[gene.startswith('mt-') for gene in adata_lymph.var_names]
    temp_mt_sum=adata_lymph[:,mt_gene_mask].X.sum(1)
    temp_mt_sum=np.squeeze(np.asarray(temp_mt_sum))
    temp_n_counts=adata_lymph.obs['n_counts']
    adata_lymph.obs['mt_frac']=temp_mt_sum/adata_lymph.obs['n_counts']
    adata_lymph.raw=adata_lymph
    sc.pp.normalize_per_cell(adata_lymph)
    sc.pp.log1p(adata_lymph)
    sc.pp.highly_variable_genes(adata_lymph, n_top_genes=4000)
    #sc.pl.highly_variable_genes(adata_lymph)
    #adata_lymph.X=adata_lymph.X.toarray()
    
    sc.pp.pca(adata_lymph,
              n_comps=50,
              use_highly_variable = True,
              random_state=0, 
              svd_solver='arpack'
    )
    sc.pp.neighbors(adata_lymph,
                    n_neighbors=100,
                    knn=True, 
                    method='umap',
                    n_pcs=50, 
                    random_state=0
    )
    sc.tl.umap(adata_lymph)
    if bool_recluster == True:
        sc.tl.leiden(adata_lymph, 
                     resolution=0.5,
                     key_added='leiden_lymph')
        pd.DataFrame(adata_lymph.obs).to_csv(path_or_buf=sc_settings_writedir+'obs_adata_lymph.csv')
    else:
        obs=pd.read_csv(sc_settings_writedir+'obs_adata_lymph.csv')
        adata_lymph.obs['leiden_lymph']=pd.Series(obs['leiden_lymph'].values, dtype = 'category')
    sc.write(sc_settings_writedir+'adata_lymph.h5ad', adata_lymph)
else:
    adata_lymph=sc.read(sc_settings_writedir+'adata_lymph.h5ad') 
sc.tl.paga(adata_lymph)

running PAGA
    finished: added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:00)


In [40]:
if bool_plot == True:
    plt.rcParams['figure.figsize']=[10,10]
    cf.plot_umap_marker(adata_lymph,
                        ['leiden_lymph'], 
                        save="_lymph_cells_leiden_clustering",
                        use_raw=False, 
                        legend_loc='on data', 
                        size=50
    )

## Cluster cell type - effector lymphocytes

In [41]:
#####################################################################################################################
if bool_plot==True:
    marker_genes_dict={'T Cell': ['Cd3g','Cd3d','Cd3e'],
                       'CD4+ T Cell': ['Cd4','Sell','Cd40lg','Cd2'],
                       'CD8+ T Cell': ['Cd8a','Cd8b1','Gzmk','Gzmb','Lef1'],
                       'NK Cell': ['Klrd1','Klrc1','Ncr1','Il2rb','Klrb1c','Klrk1'],
                       'ILCs': ['Il7r','Cd200r1','Gata3']
    } 
    for i in list(marker_genes_dict.keys()):
        marker_genes_dict[i].sort()
        
    sc.pl.dotplot(adata=adata_lymph,
                  var_names=marker_genes_dict, 
                  groupby='leiden_lymph',
                  use_raw=True, 
                  log=False, 
                  dendrogram=True, 
                  var_group_rotation=90, 
                  show=True, 
                  #size_title=5,
                  save="lymph_cells_celltypes_markers.pdf"
    )

## Differential gene expression - effector lymphocytes - clusters

In [42]:
if bool_recomp == True: 
    #adata_imuno1 = adata_imuno[:, adata_imuno.var.highly_variable] # Keep only top 4000
    sc.tl.rank_genes_groups(adata_lymph, 
                            'leiden_lymph',
                            use_raw=False,
                            method='t-test'
    )  
    # Build the 
    result = adata_lymph.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    df = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                   for group in groups for key in ['names', 'logfoldchanges', 'pvals_adj']}
    )
    df.to_csv("DGE_lymph_clusters.csv")

In [28]:
if bool_plot==True:
    adata_proc.obs['Astrocytes'] = (adata_proc[:,'Gfap'].X>0.01) & (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Slc1a3'].X>0.01)*1
    adata_proc.obs['Endothelial'] = (adata_proc[:,'Cldn5'].X>0.01) & (adata_proc[:,'Pecam1'].X>0.01) & (adata_proc[:,'Slco1c1'].X>0.01)*1
    adata_proc.obs['Ependymal'] = (adata_proc[:,'Ccdc153'].X>0.01) & (adata_proc[:,'Rarres2'].X>0.01) & (adata_proc[:,'Hdc'].X>0.01) & (adata_proc[:,'Tm4sf1'].X>0.01)*1
    adata_proc.obs['Microglia'] = (adata_proc[:,'Itgam'].X>0.01) & (adata_proc[:,'Tmem119'].X>0.01) & (adata_proc[:,'Cx3cr1'].X>0.01) & (adata_proc[:,'Csf1r'].X>0.01) & (adata_proc[:,'Aif1'].X>0.01) & (adata_proc[:,'P2ry12'].X>0.01)*1
    adata_proc.obs['Mural'] = (adata_proc[:,'Mustn1'].X>0.01) & (adata_proc[:,'Pdgfrb'].X>0.01) & (adata_proc[:,'Des'].X>0.01)*1
    adata_proc.obs['Neurons'] = (adata_proc[:,'Rbfox3'].X>0.01) & (adata_proc[:,'Syp'].X>0.01) & (adata_proc[:,'Tubb3'].X>0.01) & (adata_proc[:,'Snap25'].X>0.01) & (adata_proc[:,'Syt1'].X>0.01)*1
    adata_proc.obs['Oligodendro'] = (adata_proc[:,'Olig1'].X>0.01) & (adata_proc[:,'Mog'].X>0.01) & (adata_proc[:,'Mag'].X>0.01)*1
    adata_proc.obs['Tanycytes'] = (adata_proc[:,'Rax'].X>0.01) & (adata_proc[:,'Lhx2'].X>0.01) & (adata_proc[:,'Col23a1'].X>0.01) & (adata_proc[:,'Slc16a2'].X>0.01)*1
    adata_proc.obs['VLMCs'] = (adata_proc[:,'Lum'].X>0.01) & (adata_proc[:,'Col1a1'].X>0.01) & (adata_proc[:,'Col3a1'].X>0.01)*1


    adata_proc.obs['gfap_lepr'] = (adata_proc[:,'Gfap'].X>0.01) & (adata_proc[:,'Lepr'].X>0.01) & (adata_proc[:,'Aldh1l1'].X==0)*1
    adata_proc.obs['gfap_insr'] = (adata_proc[:,'Gfap'].X>0.01) & (adata_proc[:,'Insr'].X>0.01) & (adata_proc[:,'Aldh1l1'].X==0)*1
    adata_proc.obs['gfap_igf1r'] = (adata_proc[:,'Gfap'].X>0.01) & (adata_proc[:,'Igf1r'].X>0.01) & (adata_proc[:,'Aldh1l1'].X==0)*1
    adata_proc.obs['gfap_glp1r'] = (adata_proc[:,'Gfap'].X>0.01) & (adata_proc[:,'Glp1r'].X>0.01) & (adata_proc[:,'Aldh1l1'].X==0)*1
    adata_proc.obs['gfap_lpl'] = (adata_proc[:,'Gfap'].X>0.01) & (adata_proc[:,'Lpl'].X>0.01) & (adata_proc[:,'Aldh1l1'].X==0)*1
    adata_proc.obs['gfap_slc2a1'] = (adata_proc[:,'Gfap'].X>0.01) & (adata_proc[:,'Slc2a1'].X>0.01) & (adata_proc[:,'Aldh1l1'].X==0)*1

    adata_proc.obs['aldh_lepr'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Lepr'].X>0.01) & (adata_proc[:,'Gfap'].X==0)*1
    adata_proc.obs['aldh_insr'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Insr'].X>0.01) & (adata_proc[:,'Gfap'].X==0)*1
    adata_proc.obs['aldh_igf1r'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Igf1r'].X>0.01) & (adata_proc[:,'Gfap'].X==0)*1
    adata_proc.obs['aldh_glp1r'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Glp1r'].X>0.01) & (adata_proc[:,'Gfap'].X==0)*1
    adata_proc.obs['aldh_lpl'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Lpl'].X>0.01) & (adata_proc[:,'Gfap'].X==0)*1
    adata_proc.obs['aldh_slc2a1'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Slc2a1'].X>0.01) & (adata_proc[:,'Gfap'].X==0)*1

    adata_proc.obs['both_lepr'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Lepr'].X>0.01) & (adata_proc[:,'Gfap'].X>0.01)*1
    adata_proc.obs['both_insr'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Insr'].X>0.01) & (adata_proc[:,'Gfap'].X>0.01)*1
    adata_proc.obs['both_igf1r'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Igf1r'].X>0.01) & (adata_proc[:,'Gfap'].X>0.01)*1
    adata_proc.obs['both_glp1r'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Glp1r'].X>0.01) & (adata_proc[:,'Gfap'].X>0.01)*1
    adata_proc.obs['both_lpl'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Lpl'].X>0.01) & (adata_proc[:,'Gfap'].X>0.01)*1
    adata_proc.obs['both_slc2a1'] = (adata_proc[:,'Aldh1l1'].X>0.01) & (adata_proc[:,'Slc2a1'].X>0.01) & (adata_proc[:,'Gfap'].X>0.01)*1
    
    adata_proc.obs.to_csv("adata_proc_upset.obs.csv")

### Melanie

In [42]:
bool_plot=True

In [41]:
if bool_plot == True:
    adata_mel=adata_proc[(adata_proc.obs['celltypes']=='Astrocytes'),:] # Astrocytes Neurons
    adata_mel.obs['diet_celltype'] = adata_mel.obs['celltypes'].astype(str) + '_' + adata_mel.obs['diet'].astype(str)

    #melanie = ['Opa1','Dnm1l','Mfn1','Ngb','Apoe','Cpt1a','Lpl','Pdk4','Acadm','Acsl6','Acacb','Mlycd','Gja1']
    tim = ['Nnat','Peg3','Plagl1']

    adata_mel.layers['scaled'] = sc.pp.scale(adata_mel, copy=True).X

    sc.pl.matrixplot(adata_mel, tim, 'diet_celltype', dendrogram=False, swap_axes=True,
                     colorbar_title='mean z-score', layer='scaled', cmap='RdBu_r', vmin=-0.1, vmax=0.1) #, vmin=-0.2, vmax=0.2

    #sc.pl.stacked_violin(adata_mel, melanie, 'diet_celltype', dendrogram=False, swap_axes=True,
    #                     colorbar_title='mean z-score', layer='scaled',  cmap='RdBu_r')
    adata_all=adata_proc
    adata_all.layers['scaled'] = sc.pp.scale(adata_all, copy=True).X
    sc.pl.matrixplot(adata_all, tim, 'diet', dendrogram=False, swap_axes=True,
                     colorbar_title='mean z-score', layer='scaled', cmap='RdBu_r', vmin=-0.02, vmax=0.02) #, vmin=-0.2, vmax=0.2

In [31]:
#####################################################################################################################
if bool_plot == True:
    adata_proc_hv = adata_proc[:, adata_proc.var.highly_variable]
    adata_proc_hv.raw = adata_qc[:, adata_proc.var.highly_variable]
    adata_proc_hv.obs['diet_leiden'] = adata_proc_hv.obs['diet'].str.cat(adata_proc_hv.obs['leiden'], sep='_')
    adata_proc_hv.obs['diet_celltypes'] = adata_proc_hv.obs['diet'].str.cat(adata_proc_hv.obs['celltypes'], sep='_')
    
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_5_Astrocytes'], reference='chow_Astrocytes', key_added="ct5_ast", method='t-test') #  wilcoxon
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_5_Endothelial cells'], reference='chow_Endothelial cells', key_added="ct5_end", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_5_Ependymal cells'], reference='chow_Ependymal cells', key_added="ct5_epe", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_5_Microglia'], reference='chow_Microglia', key_added="ct5_mic", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_5_Mural cells'], reference='chow_Mural cells', key_added="ct5_mur", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_5_Neurons'], reference='chow_Neurons', key_added="ct5_neu", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_5_Oligodendrocytes'], reference='chow_Oligodendrocytes', key_added="ct5_oli", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_5_Tanycytes'], reference='chow_Tanycytes', key_added="ct5_tan", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_5_VLMCs'], reference='chow_VLMCs', key_added="ct5_vlmc", method='t-test')
    
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_15_Astrocytes'], reference='chow_Astrocytes', key_added="ct15_ast", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_15_Endothelial cells'], reference='chow_Endothelial cells', key_added="ct15_end", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_15_Ependymal cells'], reference='chow_Ependymal cells', key_added="ct15_epe", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_15_Microglia'], reference='chow_Microglia', key_added="ct15_mic", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_15_Mural cells'], reference='chow_Mural cells', key_added="ct15_mur", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_15_Neurons'], reference='chow_Neurons', key_added="ct15_neu", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_15_Oligodendrocytes'], reference='chow_Oligodendrocytes', key_added="ct15_oli", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_15_Tanycytes'], reference='chow_Tanycytes', key_added="ct15_tan", method='t-test')
    sc.tl.rank_genes_groups(adata_proc_hv, 'diet_celltypes', groups=['hfd_15_VLMCs'], reference='chow_VLMCs', key_added="ct15_vlmc", method='t-test')

    ct0_5=[]
    ct0_5up=[]
    ct0_5down=[]
    df0_5=[]
    for i in ['ct5_ast','ct5_end','ct5_epe','ct5_mic','ct5_mur','ct5_neu','ct5_oli','ct5_tan','ct5_vlmc']:
        result = adata_proc_hv.uns[i]
        groups = result['names'].dtype.names
        df5 = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                            for group in groups for key in ['pvals_adj', 'logfoldchanges']}) 
        #print(sum(df5.iloc[:, 0]<0.05))
        print(sum((df5.iloc[:, 0]<0.05) & (df5.iloc[:, 1]>0)))
        print(-sum((df5.iloc[:, 0]<0.05) & (df5.iloc[:, 1]<=0)))
        ct0_5.append(sum(df5.iloc[:, 0]<0.05))  
        ct0_5up.append(sum((df5.iloc[:, 0]<0.05) & (df5.iloc[:, 1]>0)))  
        ct0_5down.append(sum((df5.iloc[:, 0]<0.05) & (df5.iloc[:, 1]<=0)))  
        df0_5.append(df5) 
        
    ct0_15=[]
    ct0_15up=[]
    ct0_15down=[]
    df0_15=[]
    for i in ['ct15_ast','ct15_end','ct15_epe','ct15_mic','ct15_mur','ct15_neu','ct15_oli','ct15_tan','ct15_vlmc']:
        result = adata_proc_hv.uns[i]
        groups = result['names'].dtype.names
        df15 = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                             for group in groups for key in ['pvals_adj', 'logfoldchanges']}) 
        #print(sum(df15.iloc[:, 0]<0.05))
        #print(sum((df15.iloc[:, 0]<0.05) & (df15.iloc[:, 1]>0)))
        #print(-sum((df15.iloc[:, 0]<0.05) & (df15.iloc[:, 1]<=0)))
        ct0_15.append(sum(df15.iloc[:, 0]<0.05))
        df0_15.append(df15)
    
    d = {'0_5':ct0_5,'0_15':ct0_15}
    dfin = pd.DataFrame(d)
    dfin['0_0'] = [0,0,0,0,0,0,0,0,0]
    dfin=dfin.reindex(['0_0','0_5','0_15'], axis=1)
    dfin.index=['Astrocytes','Endothelial cells','Ependymal cells','Microglia','Mural cells','Neurons','Oligodendrocytes','Tanycytes','VLMCs']
    
    plt.rcParams["figure.figsize"] = (7,7)

    line_chart1 = plt.plot(['SD', '5d HFHS diet', '15d HFHS diet'], dfin.iloc[0], linewidth=3, c='#1f77b4')
    line_chart1 = plt.plot(['SD', '5d HFHS diet', '15d HFHS diet'], dfin.iloc[1], linewidth=3, c='#aa40fc')
    line_chart1 = plt.plot(['SD', '5d HFHS diet', '15d HFHS diet'], dfin.iloc[2], linewidth=3, c='#279e68')
    line_chart1 = plt.plot(['SD', '5d HFHS diet', '15d HFHS diet'], dfin.iloc[3], linewidth=3, c='#aec7e8')
    line_chart1 = plt.plot(['SD', '5d HFHS diet', '15d HFHS diet'], dfin.iloc[4], linewidth=3, c='#98df8a')
    line_chart1 = plt.plot(['SD', '5d HFHS diet', '15d HFHS diet'], dfin.iloc[5], linewidth=3, c='#d62728')
    line_chart1 = plt.plot(['SD', '5d HFHS diet', '15d HFHS diet'], dfin.iloc[6], linewidth=3, c='#ffbb78')
    line_chart1 = plt.plot(['SD', '5d HFHS diet', '15d HFHS diet'], dfin.iloc[7], linewidth=3, c='#ff7f0e')
    line_chart1 = plt.plot(['SD', '5d HFHS diet', '15d HFHS diet'], dfin.iloc[8], linewidth=3, c='#ff9896')

    #plt.xlabel("X axis label")
    plt.ylabel("Number of DEGs")

In [32]:
if bool_plot == True:
    result = adata_proc_hv.uns['ct5_ast']
    groups = result['names'].dtype.names
    dfa5 = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                        for group in groups for key in ['names', 'pvals_adj', 'logfoldchanges']}) 

    result = adata_proc_hv.uns['ct5_neu']
    groups = result['names'].dtype.names
    dfn5 = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                        for group in groups for key in ['names', 'pvals_adj', 'logfoldchanges']}) 

    result = adata_proc_hv.uns['ct5_mic']
    groups = result['names'].dtype.names
    dfm5 = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                        for group in groups for key in ['names', 'pvals_adj', 'logfoldchanges']}) 
    
    result = adata_proc_hv.uns['ct15_ast']
    groups = result['names'].dtype.names
    dfa15 = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                        for group in groups for key in ['names', 'pvals_adj', 'logfoldchanges']}) 

    result = adata_proc_hv.uns['ct15_neu']
    groups = result['names'].dtype.names
    dfn15 = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                        for group in groups for key in ['names', 'pvals_adj', 'logfoldchanges']}) 
    
    result = adata_proc_hv.uns['ct15_mic']
    groups = result['names'].dtype.names
    dfm15 = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                        for group in groups for key in ['names', 'pvals_adj', 'logfoldchanges']}) 
    
    #dfa5.to_csv("DGE0_5astro.csv")
    #dfn5.to_csv("DGE0_5neuro.csv")
    #dfa15.to_csv("DGE0_15astro.csv")
    #dfn15.to_csv("DGE0_15neuro.csv")
    
    #df0_5.to_csv("DGE0_5celltypes.csv")

In [33]:
if bool_plot == True: 
    dfa5[dfa5['hfd_5_Astrocytes_n'].isin(melanie)][['hfd_5_Astrocytes_n','hfd_5_Astrocytes_p']]
    dfa15[dfa15['hfd_15_Astrocytes_n'].isin(melanie)][['hfd_15_Astrocytes_n','hfd_15_Astrocytes_p']]
    dfn5[dfn5['hfd_5_Neurons_n'].isin(melanie)][['hfd_5_Neurons_n','hfd_5_Neurons_p']]
    dfn15[dfn15['hfd_15_Neurons_n'].isin(melanie)][['hfd_15_Neurons_n','hfd_15_Neurons_p']]

In [34]:
if bool_plot == True:  
    dfa5[dfa5['hfd_5_Astrocytes_n'].isin(np.concatenate((marker_pan_a1_a2['PAN_reactive'],
                                                         marker_pan_a1_a2['A1_specific'],
                                                         marker_pan_a1_a2['A2_specific']), 
                                                        axis=None))][['hfd_5_Astrocytes_n','hfd_5_Astrocytes_p']]
    dfa15[dfa15['hfd_15_Astrocytes_n'].isin(np.concatenate((marker_pan_a1_a2['PAN_reactive'],
                                                            marker_pan_a1_a2['A1_specific'],
                                                             marker_pan_a1_a2['A2_specific']),
                                                               axis=None))][['hfd_15_Astrocytes_n','hfd_15_Astrocytes_p']]

    dfm5[dfm5['hfd_5_Microglia_n'].isin(marker_genes_mglia['activated microglia'])][['hfd_5_Microglia_n','hfd_5_Microglia_p']]
    dfm15[dfm15['hfd_15_Microglia_n'].isin(marker_genes_mglia['activated microglia'])][['hfd_15_Microglia_n','hfd_15_Microglia_p']]

### microglia inflamation markers

In [35]:
#####################################################################################################################
if bool_plot == True:  #  (adata_proc.obs['celltypes']=='Astrocytes') |
    adata_inf=adata_proc[(adata_proc.obs['celltypes']=='Microglia'),:]
    adata_inf.obs['diet_celltype'] = adata_inf.obs['celltypes'].astype(str) + '_' + adata_inf.obs['diet'].astype(str)

    marker_genes_mglia = {#'inflam_mark': ['Il1b','Il2','Il6','Tnf','Il1r1','Il6ra','Tlr4','Lcn2','Mmp2','Ccl5','Ccl12',
                          #                'Vegfa','Tpo','Plaur','Axin2'], # ,'Il5','Mmp13',
                          'activated microglia': ['Itgam','Cd14','Fcgr3','Fcgr2b','Cd40','Ptprc','Cd68','Cd80',
                                                  'Cd86','Cx3cr1','Adgre1','Fcer1g']} 
                     # ,'H2''C3H2-T23','Serping1','H2-D1','Ggta1','Iigp1','Gbp2','Fbln5','Fkbp5','Psmb8','Srgn','Amigo2'


In [36]:
#####################################################################################################################
if bool_plot == True:
    adata_inf.layers['scaled'] = sc.pp.scale(adata_inf, copy=True).X

    #sc.pl.matrixplot(adata_inf, marker_genes_mglia, 'diet_celltype', dendrogram=False, swap_axes=True,
    #                 colorbar_title='mean z-score', layer='scaled', vmin=-0.2, vmax=0.2, cmap='RdBu_r')
    sc.pl.stacked_violin(adata_inf, marker_genes_mglia, 'diet_celltype', dendrogram=False, swap_axes=True,
                     colorbar_title='mean z-score', layer='scaled', vmin=-2, vmax=2, cmap='RdBu_r')

In [38]:
#####################################################################################################################
if bool_plot == True:
    adata_a = adata_astro
    adata_a.layers['scaled'] = sc.pp.scale(adata_a, copy=True).X
    marker_pan_a1_a2 = {'PAN_reactive': ['Lcn2','Steap4','S1pr3','Timp1','Hsbp1','Cxcl10','Cd44','Osmr','Cp','Serpina3n',
                                         'Aspg','Vim','Gfap'],
                        'A1_specific': ['Serping1','H2-D1','Ggta1','Iigp1','Gbp2','Fbln5','Fkbp5','Psmb8',
                                        'Srgn','Amigo2'], #'C3H2-T23',
                        'A2_specific': ['Clcf1','Tgm1','Ptx3','S100a10','Sphk1','Cd109','Ptgs2','Emp1','Slc10a6','Tm4sf1',
                                        'B3gnt5','Cd14','Stat3']}
    sc.pl.matrixplot(adata_a, marker_pan_a1_a2, 'diet', dendrogram=False, swap_axes=True, 
                     colorbar_title='mean z-score', layer='scaled', vmin=-0.2, vmax=0.2, cmap='RdBu_r')

    sc.pl.heatmap(adata_a, marker_pan_a1_a2, swap_axes=True, show=False, use_raw=True,
                  show_gene_labels=True, groupby='diet', dendrogram=False, layer='scaled', vmin=-2, vmax=2, cmap='RdBu_r')

    sc.pl.matrixplot(adata=adata_a, var_names=marker_pan_a1_a2, groupby='diet', use_raw=False, log=False,  
                     dendrogram=False, var_group_rotation=90, swap_axes=True, show=True)

## Co-expression

In [39]:
if bool_recomp == True:
    df = pd.DataFrame(adata_astro.raw.X.toarray(), columns = adata_astro.var.index)
    df.index = adata_astro.obs.index
    df = df.assign(diet = adata_astro.obs['diet'])

    df = df.assign(gfap_lepr = ((df['Gfap']>0) & (df['Lepr']>0) & (df['Aldh1l1']==0))*1)
    df = df.assign(gfap_insr = ((df['Gfap']>0) & (df['Insr']>0) & (df['Aldh1l1']==0))*1)
    df = df.assign(gfap_igf1r = ((df['Gfap']>0) & (df['Igf1r']>0) & (df['Aldh1l1']==0))*1)
    df = df.assign(gfap_glp1r = ((df['Gfap']>0) & (df['Glp1r']>0) & (df['Aldh1l1']==0))*1)
    df = df.assign(gfap_lpl = ((df['Gfap']>0) & (df['Lpl']>0) & (df['Aldh1l1']==0))*1)
    df = df.assign(gfap_slc2a1 = ((df['Gfap']>0) & (df['Slc2a1']>0) & (df['Aldh1l1']==0))*1)
    df = df.assign(gfap_ucp2 = ((df['Gfap']>0) & (df['Ucp2']>0) & (df['Aldh1l1']==0))*1)
    df = df.assign(gfap_ghsr = ((df['Gfap']>0) & (df['Ghsr']>0) & (df['Aldh1l1']==0))*1)
    df = df.assign(gfap_slc2a2 = ((df['Gfap']>0) & (df['Slc2a2']>0) & (df['Aldh1l1']==0))*1)

    df = df.assign(aldh_lepr = ((df['Aldh1l1']>0) & (df['Lepr']>0) & (df['Gfap']==0))*1)
    df = df.assign(aldh_insr = ((df['Aldh1l1']>0) & (df['Insr']>0) & (df['Gfap']==0))*1)
    df = df.assign(aldh_igf1r = ((df['Aldh1l1']>0) & (df['Igf1r']>0) & (df['Gfap']==0))*1)
    df = df.assign(aldh_glp1r = ((df['Aldh1l1']>0) & (df['Glp1r']>0) & (df['Gfap']==0))*1)
    df = df.assign(aldh_lpl = ((df['Aldh1l1']>0) & (df['Lpl']>0) & (df['Gfap']==0))*1)
    df = df.assign(aldh_slc2a1 = ((df['Aldh1l1']>0) & (df['Slc2a1']>0) & (df['Gfap']==0))*1)
    df = df.assign(aldh_ucp2 = ((df['Aldh1l1']>0) & (df['Ucp2']>0) & (df['Gfap']==0))*1)
    df = df.assign(aldh_ghsr = ((df['Aldh1l1']>0) & (df['Ghsr']>0) & (df['Gfap']==0))*1)
    df = df.assign(aldh_slc2a2 = ((df['Aldh1l1']>0) & (df['Slc2a2']>0) & (df['Gfap']==0))*1)

    df = df.assign(both_lepr = ((df['Aldh1l1']>0) & (df['Lepr']>0) & (df['Gfap']>0))*1)
    df = df.assign(both_insr = ((df['Aldh1l1']>0) & (df['Insr']>0) & (df['Gfap']>0))*1)
    df = df.assign(both_igf1r = ((df['Aldh1l1']>0) & (df['Igf1r']>0) & (df['Gfap']>0))*1)
    df = df.assign(both_glp1r = ((df['Aldh1l1']>0) & (df['Glp1r']>0) & (df['Gfap']>0))*1)
    df = df.assign(both_lpl = ((df['Aldh1l1']>0) & (df['Lpl']>0) & (df['Gfap']>0))*1)
    df = df.assign(both_slc2a1 = ((df['Aldh1l1']>0) & (df['Slc2a1']>0) & (df['Gfap']>0))*1)

    df = df.assign(both_ucp2 = ((df['Aldh1l1']>0) & (df['Ucp2']>0) & (df['Gfap']>0))*1)
    df = df.assign(both_ghsr = ((df['Aldh1l1']>0) & (df['Ghsr']>0) & (df['Gfap']>0))*1)
    df = df.assign(both_slc2a2 = ((df['Aldh1l1']>0) & (df['Slc2a2']>0) & (df['Gfap']>0))*1)
    
    df.iloc[:,31253:31281].to_csv("adata_astro_upset.obs.csv")

# RNA Velocitiy and clustering

In [40]:
spmark=['Aldoc','Aqp4','S1pr1','Slc38a1',
        'Npy','Pcsk1n','Ins2','Il1b','Ifit3b','Flt1','Ucp2','Vamp5','S100b','Gfap','Aldh1l1',
        'Apoe','Ift43','Tpt1','Slc1a2','Slc3a2']

#### chow

In [41]:
if bool_recomp == True:
    astro0 = adata_astro[adata_astro.obs['diet']=='chow',]
    sc.tl.leiden(astro0, resolution=0.5, key_added='cluster0')
    astro0.obs['cluster0n'] = astro0.obs['cluster0'].replace({'0': 'b', '1': 'd', '2': 'a', '3': 'c'})
    sc.write(sc_settings_writedir+'astro0.h5ad', astro0)
else:
    astro0 = sc.read(sc_settings_writedir+'astro0.h5ad')   

In [42]:
if bool_recomp == True:  
    adata_loom0 = scv.read(dir_scv+'MUC26030/possorted_genome_bam_Z0I20.loom', cache=True)
    astro0_v = scv.utils.merge(astro0, adata_loom0)
    scv.pl.proportions(astro0_v)
    scv.pp.filter_and_normalize(astro0_v, min_shared_counts=20, n_top_genes=2000)
    scv.pp.moments(astro0_v, n_pcs=30, n_neighbors=30)
    scv.tl.recover_dynamics(astro0_v)
    scv.tl.velocity(astro0_v)
    scv.tl.velocity_graph(astro0_v)
    
    sc.write(sc_settings_writedir+'astro0_v.h5ad', astro0_v)
else:
    astro0_v = sc.read(sc_settings_writedir+'astro0_v.h5ad') 

In [43]:
if bool_plot == True:                              #'dimgray','gray','darkgray','lightgray'   'darkslategray','steelblue','lightsteelblue','bisque'
    cf.plot_umap_marker(astro0, ['cluster0n'], palette=['firebrick','mediumblue','dodgerblue','aqua'],
                        use_raw=False, size=60, frameon=False)
    scv.set_figure_params()
    scv.pl.velocity_embedding_stream(astro0_v, basis='umap', color='cluster0n', use_raw=False, size=60, figsize=(6.5,6.5), 
                                     legend_loc='right margin', palette=['firebrick','mediumblue','dodgerblue','aqua']) 

In [44]:
if bool_plot == True: 
    labels = 'b', 'd', 'a', 'c'
    sizes = astro0.obs['cluster0n'].value_counts()

    fig1, ax1 = plt.subplots()
    ax1.pie(sizes,  labels=labels, autopct='%1.1f%%', startangle=90, pctdistance=1.3, labeldistance=None,
            colors=['mediumblue','aqua','firebrick','dodgerblue'], textprops={'fontsize': 20, 'color':"black"})
    ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    plt.show()

Rank genes by likelihoods per cluster/regime.
This ranks genes by their likelihood obtained from the dynamical model grouped by clusters specified in groupby.

In [45]:
if bool_recomp == True: 
    top_genes = astro0_v.var['fit_likelihood'].sort_values(ascending=False).index

    scv.tl.rank_dynamical_genes(astro0_v, groupby='cluster0')
    df = scv.get_df(astro0_v, 'rank_dynamical_genes/names')
    df.to_csv('chow_potential_drivers.csv')
    print(df.shape)
    df.head(10)

In [46]:
if bool_plot == True:
    scv.tl.latent_time(astro0_v)
    top_genes = df['1']   #['Aldoc','Aqp4','S1pr1','Slc38a1']
    scv.pl.heatmap(astro0_v, var_names=top_genes, sortby='latent_time', col_color='cluster0n', n_convolve=100,
                   figsize=(10,15), font_scale=1, colorbar=True, yticklabels=True)

In [47]:
if bool_plot == True:
    scv.pl.velocity(astro0_v, var_names=['Pcsk1n'], figsize=(10,11),  #'Aldoc','Pcsk1n'
                    colorbar=True, ncols=2, color='cluster0n')
   # astro0_v_aldh = sc.read(sc_settings_writedir+'astro0_v_aldh.h5ad')
   # scv.pl.velocity(astro0_v_aldh, var_names=['Gfap','Aldoc','Pcsk1n','Aqp4','S1pr1','Pcdh15','Pak3','Aldh1l1'], colorbar=True, ncols=1, color='cluster0n')

#### hfd_5

In [48]:
if bool_recomp == True:
    astro5 = adata_astro[adata_astro.obs['diet']=='hfd_5',]
    sc.tl.leiden(astro5, resolution=0.5, key_added='cluster5')
    astro5.obs['cluster5n'] = astro5.obs['cluster5'].replace({'0': 'c', '1': 'a', '2': 'b'})
    sc.write(sc_settings_writedir+'astro5.h5ad', astro5)
else:
    astro5 = sc.read(sc_settings_writedir+'astro5.h5ad') 

In [49]:
if bool_recomp == True: 
    adata_loom5 = scv.read(dir_scv+'MUC26031/possorted_genome_bam_VXMFJ.loom', cache=True)
    astro5_v = scv.utils.merge(astro5, adata_loom5)
    scv.pl.proportions(astro5_v)
    scv.pp.filter_and_normalize(astro5_v, min_shared_counts=20, n_top_genes=2000)
    scv.pp.moments(astro5_v, n_pcs=30, n_neighbors=30)
    scv.tl.recover_dynamics(astro5_v)
    scv.tl.velocity(astro5_v)
    scv.tl.velocity_graph(astro5_v)

    sc.write(sc_settings_writedir+'astro5_v.h5ad', astro5_v)
else:
    astro5_v = sc.read(sc_settings_writedir+'astro5_v.h5ad') 

In [50]:
if bool_plot == True: 
    cf.plot_umap_marker(astro5, ['cluster5n'], use_raw=False, size=60, frameon=False,
                        palette=['darkorange','orangered','darkcyan'])
    scv.set_figure_params()
    scv.pl.velocity_embedding_stream(astro5_v, basis='umap', color='cluster5n', use_raw=False, size=60, figsize=(6.5,6.5), 
                                     legend_loc='right margin', palette=['darkorange','orangered','darkcyan'])

In [51]:
if bool_plot == True: 
    labels = 'c', 'a', 'b'
    sizes = astro5.obs['cluster5n'].value_counts()

    fig1, ax1 = plt.subplots()
    ax1.pie(sizes,  labels=labels, autopct='%1.1f%%', startangle=90, pctdistance=1.3, labeldistance=None,
            colors=['darkcyan','darkorange','orangered'], textprops={'fontsize': 20, 'color':"black"})
    ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    plt.show()

In [52]:
if bool_recomp == True: 
    top_genes = astro5_v.var['fit_likelihood'].sort_values(ascending=False).index

    scv.tl.rank_dynamical_genes(astro5_v, groupby='leiden', n_genes=400)
    df = scv.get_df(astro5_v, 'rank_dynamical_genes/names')
    df.to_csv('hfd5_potential_drivers.csv')
    print(df.shape)
    df.head(10)

In [53]:
if bool_plot == True:
    scv.tl.latent_time(astro5_v)
    top_genes = df['1'] #['Npy','Pcsk1n','Ins2','Il1b','Ifit3b','Flt1','Ucp2','Vamp5','S100b','Gfap','Aldh1l1']
    scv.pl.heatmap(astro5_v, var_names=top_genes, sortby='latent_time', col_color='cluster5n', n_convolve=100,
                  col_cluster=False, figsize=(10,15), font_scale=1, yticklabels=True)

In [54]:
if bool_plot == True:
    scv.pl.velocity(astro5_v, var_names=['Ucp2'], figsize=(10,12), #'Gfap', 'Clu','Ucp2','Vamp5'
                    colorbar=True, ncols=1, color='cluster5n')
    #astro5_v_aldh = sc.read(sc_settings_writedir+'astro5_v_aldh.h5ad')
    #scv.pl.velocity(astro5_v_aldh, var_names=['Aldh1l1'], colorbar=True, ncols=1, color='cluster5n')

#### hfd_15

In [55]:
if bool_recomp == True:
    astro15 = adata_astro[adata_astro.obs['diet']=='hfd_15',]
    sc.tl.leiden(astro15, resolution=0.5, key_added='cluster15')
    astro15.obs['cluster15n'] = astro15.obs['cluster15'].replace({'0': 'c', '1': 'b', '2': 'a', '3': 'd'})
    sc.write(sc_settings_writedir+'astro15.h5ad', astro15)
else:
    astro15 = sc.read(sc_settings_writedir+'astro15.h5ad') 

In [56]:
if bool_recomp == True: 
    adata_loom15 = scv.read(dir_scv+'MUC26032/possorted_genome_bam_2UK19.loom', cache=True)
    astro15_v = scv.utils.merge(astro15, adata_loom15)
    scv.pl.proportions(astro15_v)
    scv.pp.filter_and_normalize(astro15_v, min_shared_counts=20, n_top_genes=2000)
    scv.pp.moments(astro15_v, n_pcs=30, n_neighbors=30)
    scv.tl.recover_dynamics(astro15_v)
    scv.tl.velocity(astro15_v)
    scv.tl.velocity_graph(astro15_v)
    
    sc.write(sc_settings_writedir+'astro15_v.h5ad', astro15_v)
else:
    astro15_v = sc.read(sc_settings_writedir+'astro15_v.h5ad') 

In [57]:
if bool_plot == True:
    cf.plot_umap_marker(astro15, ['cluster15n'], use_raw=False, size=60, frameon=False, 
                        palette=['darkorange','orangered','darkcyan','lightseagreen'])
    scv.pl.velocity_embedding_stream(astro15_v, basis='umap', color='cluster15n', use_raw=True,save='check.png',
                                     size=60, legend_loc='right margin',
                                     palette=['darkorange','orangered','darkcyan','lightseagreen'])

In [58]:
if bool_plot == True:
    labels = 'c', 'b', 'a', 'd'
    sizes = astro15.obs['cluster15n'].value_counts()

    fig1, ax1 = plt.subplots()
    ax1.pie(sizes,  labels=labels, autopct='%1.1f%%', startangle=90, pctdistance=1.3, labeldistance=None,
            colors=['darkcyan','orangered','darkorange','lightseagreen'], textprops={'fontsize': 20, 'color':"black"})
    ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    plt.show()

In [59]:
if bool_recomp == True:
    top_genes = astro15_v.var['fit_likelihood'].sort_values(ascending=False).index

    scv.tl.rank_dynamical_genes(astro15_v, groupby='cluster15', n_genes=400)
    df = scv.get_df(astro15_v, 'rank_dynamical_genes/names')
    df.to_csv('hfd15_potential_drivers.csv')
    print(df.shape)
    df.head(10)

In [60]:
if bool_plot == True:
    scv.tl.latent_time(astro15_v)
    top_genes = df['1'] # ['Apoe','Ift43','Cldn10','Tpt1','Slc1a2','Slc3a2']
    scv.pl.heatmap(astro15_v, var_names=top_genes, sortby='latent_time', col_color='cluster15n', n_convolve=100,
                  col_cluster=False, figsize=(10,15), font_scale=1, yticklabels=True)

In [61]:
if bool_plot == True:
    scv.pl.velocity(astro15_v, var_names=['Apoe'], figsize=(10,12),  # 'Ift43','Apoe'
                    colorbar=True, ncols=1, color='cluster15n')
    #astro15_v_aldh = sc.read(sc_settings_writedir+'astro15_v_aldh.h5ad')
    #scv.pl.velocity(astro15_v_aldh, var_names=['Aldh1l1'], colorbar=True, ncols=1, color='cluster15n')

In [62]:
if bool_plot == True:
    adata_astro.obs['cluster'] = list(pd.concat([astro0.obs['cluster0'],
                                                 astro5.obs['cluster5'],
                                                 astro15.obs['cluster15']]))
    adata_astro.obs['diet_cluster'] = adata_astro.obs['diet'].astype(str) + '_' + adata_astro.obs['cluster'].astype(str)
    adata_astro.obs['diet_location'] = adata_astro.obs['diet_cluster'].replace({'chow_2': 'chow_up',
                                                                                'chow_0': 'chow_down',
                                                                                'chow_1': 'chow_down',
                                                                                'chow_3': 'chow_down',
                                                                                'hfd_5_0': 'hfd_5_down',
                                                                                'hfd_5_1': 'hfd_5_up',
                                                                                'hfd_5_2': 'hfd_5_up',
                                                                                'hfd_15_0': 'hfd_15_down',
                                                                                'hfd_15_1': 'hfd_15_up',
                                                                                'hfd_15_2': 'hfd_15_up',
                                                                                'hfd_15_3': 'hfd_15_down'})

#### All diets

In [63]:
if bool_plot == True:
    adata_loom_all = adata_loom0.concatenate([adata_loom5, adata_loom15])
    astro_all_v = scv.utils.merge(adata_astro, adata_loom_all)
    scv.pl.proportions(astro_all_v)
    scv.pp.filter_and_normalize(astro_all_v, min_shared_counts=20, n_top_genes=2000)
    scv.pp.moments(astro_all_v, n_pcs=30, n_neighbors=30)
    scv.tl.recover_dynamics(astro_all_v)
    scv.tl.velocity(astro_all_v)
    scv.tl.velocity_graph(astro_all_v)
    sc.write(sc_settings_writedir+'astro_all_v.h5ad', astro_all_v)
else:
    astro_all_v = sc.read(sc_settings_writedir+'astro_all_v.h5ad') 

In [64]:
if bool_plot == True:
    cf.plot_umap_marker(adata_astro, ['leiden'], use_raw=False, size=60, frameon=False)
    scv.pl.velocity_embedding_stream(astro_all_v, basis='umap', color='leiden', use_raw=True, size=60, 
                                     legend_loc='right margin')

In [65]:
if bool_plot == True:
    top_genes = astro_all_v.var['fit_likelihood'].sort_values(ascending=False).index

    scv.tl.rank_dynamical_genes(astro_all_v, groupby='leiden', n_genes=400)
    df = scv.get_df(astro_all_v, 'rank_dynamical_genes/names')
    df.to_csv('astro_all_potential_drivers.csv')
    print(df.shape)
    df.head(10)

# Differential gene expression

### diet effect

In [66]:
if bool_plot == True:    
    sc.tl.rank_genes_groups(adata_astro, 'diet', groups=['hfd_15'], reference='chow', method='t-test_overestim_var')

    result = adata_astro.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    df = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                       for group in groups for key in ['names', 'logfoldchanges', 'pvals_adj']})

    df.columns = ['gene', 'log2fc', 'pval']
    df.to_csv(path_or_buf = dir_tables+"chow_vs_hfd_15.csv", sep="\t")
    df[df['gene'].isin(marker_genes_dict['Astrocytes'])]

### diet effect of up and down clusters

In [67]:
if bool_plot == True:
    sc.tl.rank_genes_groups(adata_astro, 'diet_location', groups=['hfd_15_down'], reference='hfd_15_up',
                             method='t-test_overestim_var') 

    result = adata_astro.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    df = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                      for group in groups for key in ['names', 'logfoldchanges', 'pvals_adj']})

    df.columns = ['gene', 'log2fc', 'pval']
    df.to_csv(path_or_buf = dir_tables+"hfd_15_up_vs_hfd_15_down.csv", sep="\t")
    df[df['gene'].isin(marker_genes_dict['Astrocytes'])]

### diet effect comparing clusters

In [68]:
if bool_plot == True:
    sc.tl.rank_genes_groups(adata_astro, 'diet_cluster', groups=['hfd_15_2'], reference='chow_2',
                             method='t-test_overestim_var') 

    result = adata_astro.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    df = pd.DataFrame({group + '_' + key[:1]: result[key][group]
                      for group in groups for key in ['names', 'logfoldchanges', 'pvals_adj']})

    df.columns = ['gene', 'log2fc', 'pval']
    df.to_csv(path_or_buf = dir_tables+"chow_2_vs_hfd_15_2.csv", sep="\t")
    df[df['gene'].isin(marker_genes_dict['Astrocytes'])]

## Astorcyte markers per diet

#### chow

In [69]:
if bool_plot == True:
    cf.plot_umap_marker(astro0, ['Gfap', 'Aldh1l1'], color_map=mymap, size=30, 
                        save="_Gfap_Aldh1l1_chow.png", use_raw=False, vmax=3.4056416, frameon=False)
    cf.plot_umap_marker(astro0, ['Slc1a2', 'Slc1a3', 'Aqp4', 'Gja1', 'Gjb6', 'Atp1b2'], color_map=mymap,
                        size=30, save="_astrocyte_markers_chow.png", use_raw=False, vmax=4.1836486, frameon=False)

#### hfd 5

In [70]:
if bool_plot == True:
    cf.plot_umap_marker(astro5, ['Gfap', 'Aldh1l1'], color_map=mymap, size=30, 
                        save="_Gfap_Aldh1l1_hfd5.png", use_raw=False, vmax=3.4056416, frameon=False)
    cf.plot_umap_marker(astro5, ['Slc1a2', 'Slc1a3', 'Aqp4', 'Gja1', 'Gjb6', 'Atp1b2'], color_map=mymap,
                        size=30, save="_astrocyte_markers_hfd5.png", use_raw=False, vmax=4.1836486, frameon=False)

#### hfd 15

In [71]:
if bool_plot == True:
    cf.plot_umap_marker(astro15, ['Gfap', 'Aldh1l1'], color_map=mymap, size=30, 
                        save="_Gfap_Aldh1l1_hfd15.png", use_raw = False, vmax=3.4056416, frameon=False)
    cf.plot_umap_marker(astro15, ['Slc1a2', 'Slc1a3', 'Aqp4', 'Gja1', 'Gjb6', 'Atp1b2'], color_map=mymap,
                        size=30, save="_astrocyte_markers_hfd15.png", use_raw = False, vmax=4.1836486, frameon=False)

# Counting

In [79]:
if bool_plot == True:
    astro0.var['n_cells'] = np.squeeze(np.asarray((astro0.raw.X > 0).sum(0)))
    astro5.var['n_cells'] = np.squeeze(np.asarray((astro5.raw.X > 0).sum(0)))
    astro15.var['n_cells'] = np.squeeze(np.asarray((astro15.raw.X > 0).sum(0)))

    p0_5 = (astro5.var[astro5.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro0.var[astro0.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']-1)*100
    p5_15 = (astro15.var[astro15.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro5.var[astro5.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']-1)*100
    p0_15 = (astro15.var[astro15.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro0.var[astro0.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']-1)*100

    p0_5.to_csv('substraction_cells_hfd5-chow.csv')
    p5_15.to_csv('substraction_cells_hfd15-hfd5.csv')

In [80]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs = p0_5.index
    students = p0_5
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percent of increase comparing to chow (%)")
    plt.title("Substraction HFD_5 - chow")
    plt.show()

In [81]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs = p5_15.index
    students = p5_15
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percentage of increase comparing to HFD_5 (%)")
    plt.title("Substraction HFD_15 - HFD_5")
    plt.show()

### up cluster

In [68]:
if bool_plot == True:
    astro0up = adata_astro[adata_astro.obs['diet_location']=='chow_up',]
    astro5up = adata_astro[adata_astro.obs['diet_location']=='hfd_5_up',]
    astro15up = adata_astro[adata_astro.obs['diet_location']=='hfd_15_up',]
    
    astro0up.var['n_cells'] = (astro0up.X > 0).sum(0)
    astro5up.var['n_cells'] = (astro5up.X > 0).sum(0)
    astro15up.var['n_cells'] = (astro15up.X > 0).sum(0)

    p0_5up = astro5up.var[astro5up.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro0up.var[astro0up.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']
    p0_15up = astro15up.var[astro15up.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro0up.var[astro0up.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']
    p5_15up = astro15up.var[astro15up.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro5up.var[astro5up.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']

In [69]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs = p0_5up.index
    students = (p0_5up-1)*100
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percent of increase comparing to chow (%)")
    plt.title("Substraction HFD_5-Chow - up")
    plt.show()

In [70]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs = p0_15up.index
    students = (p0_15up-1)*100
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percent of increase comparing to chow (%)")
    plt.title("Substraction HFD_15-Chow - up")
    plt.show()

In [71]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs = p5_15up.index
    students = (p5_15up-1)*100
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percentage of increase comparing to HFD_5 (%)")
    plt.title("Substraction HFD_15-HFD_5 - up")
    plt.show()

### down clusters

In [72]:
if bool_plot == True:
    astro0down = adata_astro[adata_astro.obs['diet_location']=='chow_down',]
    astro5down = adata_astro[adata_astro.obs['diet_location']=='hfd_5_down',]
    astro15down = adata_astro[adata_astro.obs['diet_location']=='hfd_15_down',]
    
    astro0down.var['n_cells'] = (astro0down.X > 0).sum(0)
    astro5down.var['n_cells'] = (astro5down.X > 0).sum(0)
    astro15down.var['n_cells'] = (astro15down.X > 0).sum(0)

    p0_5down = astro5down.var[astro5down.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro0down.var[astro0down.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']
    p0_15down = astro15down.var[astro15down.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro0down.var[astro0down.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']
    p5_15down = astro15down.var[astro15down.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro5down.var[astro5down.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']

In [73]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs = p0_5down.index
    students = (p0_5down-1)*100
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percent of increase comparing to chow (%)")
    plt.title("Substraction HFD_5-Chow - down")
    plt.show()

In [74]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs = p0_15down.index
    students = (p0_15down-1)*100
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percent of increase comparing to chow (%)")
    plt.title("Substraction HFD_15-Chow - down")
    plt.show()

In [75]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs = p5_15down.index
    students = (p5_15down-1)*100
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percentage of increase comparing to HFD_5 (%)")
    plt.title("Substraction HFD_15b-HFD_5 - down")
    plt.show()

### up vs down clusters

In [76]:
if bool_plot == True:
    p0down_0up = astro0down.var[astro0down.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro0up.var[astro0up.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']
    p5down_5up = astro5down.var[astro0down.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']/astro5up.var[astro5up.var.index.isin(marker_genes_dict['Astrocytes'])]['n_cells']

In [77]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs = p0down_0up.index
    students = (p0down_0up-1)*100
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percentage of increase comparing to chow up (%)")
    plt.title("Substraction chow_down-chow_up")
    plt.show()

In [78]:
if bool_plot == True:
    fig = plt.figure()
    fig.set_size_inches(9, 5)
    ax = fig.add_axes([0,0,1,1])
    langs =  p5down_5up.index
    students = (p5down_5up-1)*100
    ax.bar(langs,students)
    plt.xlabel("astrocyte markers")
    plt.ylabel("percentage of increase comparing to hfd5 up (%)")
    plt.title("Substraction hfd5_down-hfd5_up")
    plt.show()