# Analysis of OXT neurons scRNA-seq data set  

## Table of contents:

* <a href=#Load>Load Packages and Set Global Variables</a>
    * <a href=#Imports>Imports and Settings</a>
    * <a href=#Global>Global Variables</a> 
* <a href=#Dataloading>Loading Data, Quality Control and Preprocessing</a>
    * <a href=#Counts>Gene numbers and counts with and without mitochondrial RNA</a>
* <a href=#Allcells>All cells - normalization, projection and clustering</a>
* <a href=#Define>Define Cell Types</a>
* <a href=#astrocytes>Astrocytes Only</a>
    * <a href=#Embedding>Embeddings and Clustering</a>
    * <a href=#adipmarkers>Astrocyte Marker Analysis</a>
    * <a href=#topde>Top ranking DE Genes</a>
    * <a href=#count_dist>Count distribution for Gfap, Aldh1l1 and Slc1a3</a>
* <a href=#traject>Gfap and Aldh1l1 only</a>

## Load Packages and Set Global Variables

### Imports and Settings

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy as sci
import scipy.sparse
import seaborn as sns
import matplotlib as mpl
import custom_functions as cf
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib import rcParams
from gprofiler import gprofiler
import statistics
import sys
import re
import os

import batchglm
import diffxpy.api as de
%load_ext autoreload
%autoreload 2


import rpy2.rinterface_lib.callbacks
import logging
from rpy2.robjects import pandas2ri
import anndata2ri
%load_ext rpy2.ipython
anndata2ri.activate()

sc.settings.verbosity = 3 # amount of output
dir_in = '/Users/viktorian.miok/Documents/consultation/Tim/snRNA-seq_oxytocin/data/'
dir_out = '/Users/viktorian.miok/Documents/consultation/Tim/snRNA-seq_oxytocin/results/'
dir_tables = dir_out+'tables/'
sc_settings_figdir = dir_out+'panels/'
sc_settings_writedir = dir_out+'anndata/'
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=80, scanpy=True)
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=250, frameon=True, vector_friendly=True,
                     color_map="YlGnBu", format='pdf', transparent=False, ipython_format='png2x')
print (sys.version)



-----
anndata     0.7.5
scanpy      1.7.1
sinfo       0.3.1
-----
PIL                 8.1.2
PyObjCTools         NA
anndata             0.7.5
anndata2ri          1.0.6
appnope             0.1.2
autoreload          NA
backcall            0.2.0
batchglm            v0.7.4
certifi             2020.12.05
cffi                1.14.5
chardet             4.0.0
cloudpickle         1.6.0
colorama            0.4.4
custom_functions    NA
cycler              0.10.0
cython_runtime      NA
dask                2021.03.0
dateutil            2.8.1
decorator           4.4.2
diffxpy             v0.7.4
get_version         2.1
gprofiler           1.0.0
h5py                3.2.1
idna                2.10
igraph              0.9.0
ipykernel           5.4.3
ipython_genutils    0.2.0
ipywidgets          7.6.3
jedi                0.17.2
jinja2              2.11.3
joblib              1.0.1
kiwisolver          1.3.1
legacy_api_wrap     1.2
leidenalg           0.8.3
llvmlite            0.36.0
markupsafe          1.1.1

In [2]:
%%R
.libPaths( c( .libPaths(), "/Users/viktorian.miok/Library/R/4.0/library") ) 
library(scran)

R[write to console]: Загрузка требуемого пакета: SingleCellExperiment

R[write to console]: Загрузка требуемого пакета: SummarizedExperiment

R[write to console]: Загрузка требуемого пакета: GenomicRanges

R[write to console]: Загрузка требуемого пакета: stats4

R[write to console]: Загрузка требуемого пакета: BiocGenerics

R[write to console]: Загрузка требуемого пакета: parallel

R[write to console]: 
Присоединяю пакет: ‘BiocGenerics’


R[write to console]: Следующие объекты скрыты от ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB


R[write to console]: Следующие объекты скрыты от ‘package:stats’:

    IQR, mad, sd, var, xtabs


R[write to console]: Следующие объекты скрыты от ‘package:base’:

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, basename, cbind, colnames, dirname, do.call,
    duplicated, ev

In [3]:
colors2 = plt.cm.Reds(np.linspace(0, 1, 128))
colors3 = plt.cm.Greys_r(np.linspace(0.7,0.8,20))
colorsComb = np.vstack([colors3, colors2])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)

In [4]:
print(de.__version__)

v0.7.4


## Global variables

All embeddings and clusterings can be saved and loaded into this script. Be careful with over-writing cluster caches as soon as cell type annotation has started as cluster labels may be shuffled.

Set whether anndata objects are recomputed or loaded from cache.

In [5]:
bool_recomp = True

Set whether clustering is recomputed or loaded from saved .obs file. Loading makes sense if the clustering changes due to a change in scanpy or one of its dependencies and the number of clusters or the cluster labels change accordingly.

In [6]:
bool_recluster = True

Set whether cluster cache is overwritten. Note that the cache exists for reproducibility of clustering, see above.

In [7]:
bool_write_cluster_cache = False

Set whether to produce plots, set to False for test runs.

In [8]:
bool_plot = True

## Load data

In [9]:
if bool_recomp == False: 
    #adata = sc.read_excel("~/snRNAseq_data.xlsx", sheet='expression').T
    #adata.obs = pd.read_excel("~/snRNAseq_data.xlsx", sheet_name='observations')
    adata_r = sc.read(dir_in + 'adata_raw.h5ad')
    adata_r.var.index = list(adata_r.var['ensmbl_id'])
    
    # split ERCC counts into a different object
    ERCC = []
    for i in adata_r.var_names:
        if i[0:3] != "ENS" and i[0:2] != "__":
            ERCC.append(i)
    adataERCC = adata_r[:,ERCC].copy()
    
    # split endogenous transcript counts into a different object
    transc = []
    for i in adata_r.var_names:
        if i[0:3] == "ENS":
            transc.append(i)
    adataT = adata_r[:,transc].copy()
    
    adata_r.obs['n_counts'] = adata_r.X.sum(axis=1) # endogeneous transcripts + ERCCs
    adata_r.obs['n_counts_transcripts'] = np.sum(adataT.X, axis=1) # endogenous transcripts only
    adata_r.obs['n_counts_ERCC'] = np.sum(adataERCC.X, axis=1) # ERCCs only
    adata_r.obs['n_genes'] = (adata_r.X > 0).sum(1) # numbere of genes
    
    sc.write(sc_settings_writedir+'adata_r.h5ad',adata_r)
else:
    adata_r = sc.read(sc_settings_writedir+'adata_r.h5ad') 

In [10]:
if bool_plot == False:
    sc.pl.violin(adata=adata_r, 
                 keys=['n_genes', 'n_counts', 'n_counts_ERCC', 'n_counts_transcripts'],
                 jitter=0.4,
                 multi_panel=True,
                 save=True
    ) 

## Filtering

In [11]:
if bool_plot == False:
    # visualize lower cutoff for filtering cells based on the number of genes
    fig, (ax0, ax1, ax2, ax3) = plt.subplots(ncols=4, figsize=(15,5))
    ax0.hist(adata_r.obs["n_genes"], bins=100, color='c', edgecolor='k', alpha=0.65)
    ax0.axvline(1000, color='k', linestyle='dashed', linewidth=1)
    ax0.set_title('n_genes')
    ax1.hist(adata_r.obs["n_counts"], bins=100, color='c', edgecolor='k', alpha=0.65)
    ax1.set_title('n_counts')
    ax2.hist(adata_r.obs["n_counts_transcripts"], bins=100, color='c', edgecolor='k', alpha=0.65)
    ax2.set_title('n_counts_transcripts')
    ax3.hist(adata_r.obs["n_counts_ERCC"], bins=100, color='c', edgecolor='k', alpha=0.65)
    ax3.set_title('n_counts_ERCC')
    plt.show()

In [12]:
if bool_recomp == False:   
    # remove cells that have no genes expressed in order to better visualize the cutoffs
    sc.pp.filter_cells(adata_r, min_genes=1)
    sc.pp.filter_cells(adata_r, min_counts=1)

    # filtering of the cells and genes
    sc.pp.filter_cells(adata_r, min_genes=1000) 
    sc.pp.filter_genes(adata_r, min_cells=25) # roughly 1% of the population
    sc.pp.filter_genes(adata_r, min_counts=250)

    ## further filtering: 
    adata_qc = adata_r.copy()
    
    # remove cells with more than 7000 genes covered
    adata_qc = adata_qc[adata_qc.obs['n_genes'] < 4500, :]  # 7000

    # remove cells with less than 10,000 transcript counts
    adata_qc = adata_qc[adata_qc.obs['n_counts_transcripts'] > 10000, :]  # 10000
    
    # remove cells with more than 300,000 transcript counts
    adata_qc = adata_qc[adata_qc.obs['n_counts_transcripts'] < 200000, :]   #. 300000
    
    sc.write(sc_settings_writedir+'adata_qc.h5ad',adata_qc)
else:
    adata_qc = sc.read(sc_settings_writedir+'adata_qc.h5ad') 

In [13]:
if bool_plot == False:
    sc.pl.violin(adata=adata_qc, 
                 keys=['n_genes','n_counts','n_counts_ERCC','n_counts_transcripts'],
                 jitter=0.4, 
                 multi_panel=True,
                 save=True
    ) 

## Normalization - scran

In [14]:
if bool_recomp == False:   
    np.mean(adata_qc.obs["n_counts_transcripts"])

    red_trans = []
    for i in adata_qc.var_names:
        if i[0:3] == "ENS":
            red_trans.append(i) # endogeneous transcripts

    adata = adata_qc[:,red_trans].copy() 

    # split ERCC counts into a different object
    ERCC = []
    for i in adata_qc.var_names:
        if i[0:3] != "ENS" and i[0:2] != "__":
            ERCC.append(i)

    adataERCC = adata_qc[:,ERCC].copy()

    data_mat = adata.X.T
    #input_groups = adata_pp.obs['groups']

    erccs = adataERCC.X.T
    ############################################################################
    # run R code in separate cell
    #%%R -i data_mat -i erccs  -o size_factors -o spike_factors  #-i input_groups

    #sce <- SingleCellExperiment(list(counts=data_mat))
    #sce2 <- SingleCellExperiment(list(counts=erccs))
    #altExp(sce, "ERCC") <- sce2
    
    #sce = computeSumFactors(sce, min.mean=0.001)   # clusters=input_groups,
    #size_factors = sizeFactors(sce)
    #sce2 <- computeSpikeFactors(sce, "ERCC")
    #spike_factors = sizeFactors(sce2)
    ###########################################################################
    len(size_factors)
    len(spike_factors)
    
    # Visualize the estimated size factors
    adata.obs['size_factors'] = size_factors

    sc.pl.scatter(adata, 'size_factors', 'n_counts_transcripts')
    sc.pl.scatter(adata, 'size_factors', 'n_counts_transcripts')
    sc.pl.scatter(adata, 'size_factors', 'n_genes')

    sns.distplot(size_factors, bins=50, kde=False)
    
    #Normalize adata 
    adata.X /= adata.obs['size_factors'].values[:,None]
    adata.layers["norm_counts"] = adata.X.copy()
    adata.obs['n_counts_norm'] = adata.X.sum(1)
    sc.pp.log1p(adata)

    adata.var =adata.var.set_index(adata.var['gene_name'])
    adata.var=adata.var[['gene_name','n_cells','n_counts']]

    adata.var = pd.DataFrame(index=list(adata.var['gene_name']))

    adata.raw = adata
    sns.distplot(adata.obs["n_counts_norm"], bins=50, kde=False)
    
    # Filtering of normalized counts
    adata = adata[adata.obs["n_counts_norm"] < 200000]

### UMAP visualisation after normalisation

In [15]:
if bool_recomp == False: 
    sc.pp.pca(adata, n_comps=15)
    sc.pp.neighbors(adata, n_neighbors=50)
    sc.tl.tsne(adata, perplexity=30, n_pcs=15)
    sc.tl.umap(adata)
    sc.tl.leiden(adata, resolution=0.5)
    if bool_recluster == True:
        sc.tl.leiden(adata, resolution=0.5)
        pd.DataFrame(adata.obs).to_csv(path_or_buf=sc_settings_writedir+'obs_adata_astro.csv')
    else:
        obs = pd.read_csv(sc_settings_writedir+'obs_adata_astro.csv')
        adata.obs['leiden']=pd.Series(obs['leiden'].values, dtype = 'category')
    
    sc.write(sc_settings_writedir+'adata.h5ad',adata)
else:
    adata = sc.read(sc_settings_writedir+'adata.h5ad') 
sc.tl.paga(adata)

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
running PAGA
    finished: added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:00)


In [19]:
if bool_plot == False:
    sc.pl.umap(adata, color=["n_counts_norm","n_genes","condition"])
    sc.pl.umap(adata, color=["leiden","experiment","plate"])

## Batch correction

In [17]:
if bool_recomp == False:
    adata_f = adata.copy()
    adata_f.raw = adata
    sc.pp.combat(adata_f, key="plate") #  experiment plate
    sc.pp.pca(adata_f, n_comps=15)
    sc.pp.neighbors(adata_f, n_neighbors=50)
    sc.tl.tsne(adata_f, perplexity=30, n_pcs=15)
    sc.tl.umap(adata_f)
    sc.tl.leiden(adata_f, resolution=0.5)
    sc.write(sc_settings_writedir+'adata_f.h5ad', adata_f)
else:
    adata_f = sc.read(sc_settings_writedir+'adata_f.h5ad') 
sc.tl.paga(adata_f)

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
running PAGA
    finished: added
    'paga/connectivities', connectivities adjacency (adata.uns)
    'paga/connectivities_tree', connectivities subtree (adata.uns) (0:00:00)


## Visualisation
#### UMAP

In [21]:
if bool_plot == False:
    sc.pl.umap(adata_f, color=["n_counts_norm","n_genes","condition"])
    sc.pl.umap(adata_f, color=["leiden","experiment","plate"])

#### UMAP hormone receptors genes

In [22]:
if bool_plot == False:
    sc.pl.umap(adata_f, color=['Cckar','Insr','Lepr','Glp1r','Chrnb2','Oxt','Avp'], 
               use_raw=False, color_map=mymap, save="_all_HR.pdf", size=30)

#### UMAP of Prader-Willi syndrome genes

In [80]:
if bool_plot == False:
    sc.pl.umap(adata_f, color=['Trappc9','Sim1','Omg','Snrpn','Sertad4','Asap2','Dok6',
                               'Sacs','Abcd2','Camk2g','Magel2','Arnt','Otp','Pou3f2','Nr2c2'],
               use_raw=False, color_map=mymap, save="_all_PWS.pdf", size=30)

In [23]:
adata_0=adata_f[adata_f.obs['condition']=='CHOW',]
adata_1=adata_f[adata_f.obs['condition']=='HFD',]
adata_0.var_names_make_unique()
adata_1.var_names_make_unique()

  res = method(*args, **kwargs)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


#### Binary UMAP of Cckar expressing cells

In [34]:
if bool_plot == False: 
    # all cells
    adata_f.obs['cckar'] = np.select([adata_f.raw[:,'Cckar'].X>0], ['cckar'])
    c = pd.Categorical(adata_f.obs['cckar'])
    adata_f.obs['cckar'] = c.rename_categories({'cckar': 'cckar', '0': 'none'})
    
    sc.pl.umap(adata_f, 
               color=['cckar'],
               use_raw=False,  
               title='Cckar_all', 
               palette=['silver','red'],
               size=30
    )
    # chow cells
    adata_0.obs['cckar'] = np.select([adata_0.raw[:,'Cckar'].X>0], ['cckar'])
    c = pd.Categorical(adata_0.obs['cckar'])
    adata_0.obs['cckar'] = c.rename_categories({'cckar': 'cckar', '0': 'none'})

    sc.pl.umap(adata_0,
               color=['cckar'], 
               use_raw=False,
               title='Cckar_chow', 
               palette=['silver','red'],
               size=30
    )
    # hfd cells
    adata_1.obs['cckar'] = np.select([adata_1.raw[:,'Cckar'].X>0], ['cckar'])
    c = pd.Categorical(adata_1.obs['cckar'])
    adata_1.obs['cckar'] = c.rename_categories({'cckar': 'cckar', '0': 'none'})

    sc.pl.umap(adata_1,
               color=['cckar'], 
               use_raw=False,
               title='Cckar_hfd', 
               palette=['silver','red'],
               size=30
    )

#### Barplot compare diets

In [29]:
if bool_plot == False:
    fig = plt.figure()
    fig.set_size_inches(2, 3)
    ax = fig.add_axes([0,0,1,1])
    langs = list(adata_f.obs['cckar'].value_counts().index)
    students = (adata_1.obs['cckar'].value_counts()/adata_0.obs['cckar'].value_counts()-1)*100
    ax.bar(langs,students)
    plt.ylabel("percent of increase comparing to chow (%)")
    plt.title("Substraction hfd - chow")
    plt.show()

#### Dotplot of clusters using markers

In [31]:
if bool_plot==False:
    marker_genes_clust = {'cluster 1': ['Ecel1','Efnb1','Homer1','Ndufaf1','Trpm7','Rptor','Dcaf13'],
                          'cluster 2': ['Fam19a1','Ptger4','Eid2b','Msh2','Haus2'], #,'BC056474' 
                          'cluster 3': ['Fam19a1','Sim1','Siah1a'], # ,'Rfxp3','Gpr58','Ghsr'
                          'cluster 4': ['Th','Serpinb6a','Cacna1g','Dnah8','Slc8a3','Pou3f2'],
                          'cluster 5': ['Pmch','Slc17a6']} # 'Penk'
    
    for i in list(marker_genes_clust.keys()):
        marker_genes_clust[i].sort()
        
    sc.pl.dotplot(adata=adata_0,
                  var_names=marker_genes_clust, 
                  groupby='leiden',
                  use_raw=False, 
                  log=False,  
                  vmax=2.15,
                  show=True
    )
    sc.pl.dotplot(adata=adata_1,
                  var_names=marker_genes_clust, 
                  groupby='leiden',
                  use_raw=False, 
                  log=False, 
                  vmax=2.15,
                  show=True
    ) 

#### High-low UMAP of Cckar expressing cells

In [35]:
if bool_plot==False:
    # all cells
    adata_0.obs['cckar'] = np.select([(adata_0.raw[:,'Cckar'].X>=2), (adata_0.raw[:,'Cckar'].X==0), 
                                      ((adata_0.raw[:,'Cckar'].X<2) & (adata_0.raw[:,'Cckar'].X>0))], ['h', 'nu', 'l'])
    c = pd.Categorical(adata_0.obs['cckar'])
    adata_0.obs['cckar'] = c.rename_categories({'h': 'high_cckar', 'l': 'low_cckar', 'nu': 'none'})

    sc.pl.umap(adata_0,
               color=['cckar'],
               use_raw=False,
               palette=['blue','magenta','silver'],
               title='Cckar_all',
               size=30
    )
    # chow cells
    adata_1.obs['cckar'] = np.select([(adata_1.raw[:,'Cckar'].X>=2), (adata_1.raw[:,'Cckar'].X==0), 
                                      ((adata_1.raw[:,'Cckar'].X<2) & (adata_1.raw[:,'Cckar'].X>0))], ['h', 'nu', 'l'])
    c = pd.Categorical(adata_1.obs['cckar'])
    adata_1.obs['cckar'] = c.rename_categories({'h': 'high_cckar', 'l': 'low_cckar', 'nu': 'none'})

    sc.pl.umap(adata_1, 
               color=['cckar'],
               use_raw=False, 
               palette=['blue','magenta', 'silver'],
               title='Cckar_chow',
               size=30
    )
    # hfd cells
    adata_f.obs['cckar'] = np.select([(adata_f.raw[:,'Cckar'].X>=2), (adata_f.raw[:,'Cckar'].X==0), 
                                      ((adata_f.raw[:,'Cckar'].X<2) & (adata_f.raw[:,'Cckar'].X>0))], ['h', 'nu', 'l'])
    c = pd.Categorical(adata_f.obs['cckar'])
    adata_f.obs['cckar'] = c.rename_categories({'h': 'high_cckar', 'l': 'low_cckar', 'nu': 'none'})

    sc.pl.umap(adata_f, 
               color=['cckar'], 
               use_raw=False, 
               palette=['blue','magenta', 'silver'],
               title='Cckar_hfd',
               size=30
    )

#### Barplot compare diets

In [36]:
if bool_plot == False:
    fig = plt.figure()
    fig.set_size_inches(3, 3)
    ax = fig.add_axes([0,0,1,1])
    langs = list(adata_f.obs['cckar'].value_counts().index)
    students = (adata_1.obs['cckar'].value_counts()/adata_0.obs['cckar'].value_counts()-1)*100
    ax.bar(langs,students)
    plt.ylabel("percent of increase comparing to chow (%)")
    plt.title("Substraction hfd - chow")
    plt.show()

#### Prepare data for UpSet plot

In [51]:
if bool_recomp == False:
    hr=['Cckar','Insr','Lepr','Glp1r','Chrnb2','Adora1','Ntrk2','Ntsr2','Oprk1','Chrm2','Adcyap1','Esr1']
    ct=['Th','Nucb2','Slc17a6','Spx','Gal','Glud1']
    pw=['Trappc9','Sim1','Syt4','Omg','Snrpn','Sertad4','Asap2','Dok6','Sacs','Abcd2','Phkb','Camk2g','Magel2','Arnt',
        'Otp','Pou3f2','Nr2c2','Fto']
    # clusters
    adata_f.obs['clust_1'] = (adata_f.raw[:,'Trpm7'].X>0) & (adata_f.raw[:,'Dcaf13'].X>0) & (adata_f.raw[:,'Efnb1'].X==0) & (adata_f.raw[:,'Ndufaf1'].X==0)*1
    adata_f.obs['clust_2'] = (adata_f.raw[:,'Fam19a1'].X>0) & (adata_f.raw[:,'Ptger4'].X>0)*1
    adata_f.obs['clust_3'] = (adata_f.raw[:,'Fam19a1'].X>0) & (adata_f.raw[:,'Sim1'].X>0)*1
    adata_f.obs['clust_4'] = (adata_f.raw[:,'Th'].X>0) & (adata_f.raw[:,'Slc8a3'].X>0) & (adata_f.raw[:,'Pou3f2'].X>0)*1
    adata_f.obs['clust_5'] = (adata_f.raw[:,'Pmch'].X>0) & (adata_f.raw[:,'Slc17a6'].X>0)*1

    # Hormone receptor and Cckar
    adata_f.obs['cckar_lepr'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Lepr'].X>0)*1
    adata_f.obs['cckar_insr'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Insr'].X>0)*1
    adata_f.obs['cckar_glp1r'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Glp1r'].X>0)*1
    adata_f.obs['cckar_chrnb2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Chrnb2'].X>0)*1
    adata_f.obs['cckar_adora1'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Adora1'].X>0)*1
    adata_f.obs['cckar_ntrk2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Ntrk2'].X>0)*1
    adata_f.obs['cckar_ntsr2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Ntsr2'].X>0)*1
    adata_f.obs['cckar_oprk1'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Oprk1'].X>0)*1
    adata_f.obs['cckar_chrm2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Chrm2'].X>0)*1
    adata_f.obs['cckar_adcyap1'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Adcyap1'].X>0)*1
    adata_f.obs['cckar_esr1'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Esr1'].X>0)*1

    # Co-transmiter and Cckar
    adata_f.obs['cckar_nucb2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Nucb2'].X>0)*1
    adata_f.obs['cckar_th'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Th'].X>0)*1
    adata_f.obs['cckar_slc17a6'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Slc17a6'].X>0)*1
    adata_f.obs['cckar_spx'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Spx'].X>0)*1
    adata_f.obs['cckar_gal'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Gal'].X>0)*1
    adata_f.obs['cckar_glud1'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Glud1'].X>0)*1

    #Prader-Willi Syndrome/Exocytosis anc Cckar
    adata_f.obs['cckar_trappc9'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Trappc9'].X>0)*1
    adata_f.obs['cckar_sim1'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Sim1'].X>0)*1
    adata_f.obs['cckar_syt4'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Syt4'].X>0)*1
    adata_f.obs['cckar_omg'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Omg'].X>0)*1
    adata_f.obs['cckar_snrpn'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Snrpn'].X>0)*1
    adata_f.obs['cckar_sertad4'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Sertad4'].X>0)*1
    adata_f.obs['cckar_asap2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Asap2'].X>0)*1
    adata_f.obs['cckar_dok6'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Dok6'].X>0)*1
    adata_f.obs['cckar_sacs'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Sacs'].X>0)*1
    adata_f.obs['cckar_abcd2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Abcd2'].X>0)*1
    adata_f.obs['cckar_phkb'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Phkb'].X>0)*1
    adata_f.obs['cckar_camk2g'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Camk2g'].X>0)*1
    adata_f.obs['cckar_magel2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Magel2'].X>0)*1
    adata_f.obs['cckar_arnt'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Arnt'].X>0)*1
    adata_f.obs['cckar_otp'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Otp'].X>0)*1
    adata_f.obs['cckar_pou3f2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Pou3f2'].X>0)*1
    adata_f.obs['cckar_nr2c2'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Nr2c2'].X>0)*1
    adata_f.obs['cckar_fto'] = (adata_f.raw[:,'Cckar'].X>0) & (adata_f.raw[:,'Fto'].X>0)*1

    # Hormone receptor
    adata_f.obs['cckar'] = (adata_f.raw[:,'Cckar'].X>0)*1
    adata_f.obs['lepr'] = (adata_f.raw[:,'Lepr'].X>0)*1
    adata_f.obs['insr'] = (adata_f.raw[:,'Insr'].X>0)*1
    adata_f.obs['glp1r'] = (adata_f.raw[:,'Glp1r'].X>0)*1
    adata_f.obs['chrnb2'] = (adata_f.raw[:,'Chrnb2'].X>0)*1
    adata_f.obs['adora1'] = (adata_f.raw[:,'Adora1'].X>0)*1
    adata_f.obs['ntrk2'] = (adata_f.raw[:,'Ntrk2'].X>0)*1
    adata_f.obs['ntsr2'] = (adata_f.raw[:,'Ntsr2'].X>0)*1
    adata_f.obs['oprk1'] = (adata_f.raw[:,'Oprk1'].X>0)*1
    adata_f.obs['chrm2'] = (adata_f.raw[:,'Chrm2'].X>0)*1
    adata_f.obs['adcyap1'] = (adata_f.raw[:,'Adcyap1'].X>0)*1
    adata_f.obs['esr1'] = (adata_f.raw[:,'Esr1'].X>0)*1

    # Co-transmiter
    adata_f.obs['nucb2'] = (adata_f.raw[:,'Nucb2'].X>0)*1
    adata_f.obs['th'] = (adata_f.raw[:,'Th'].X>0)*1
    adata_f.obs['slc17a6'] = (adata_f.raw[:,'Slc17a6'].X>0)*1
    adata_f.obs['spx'] = (adata_f.raw[:,'Spx'].X>0)*1
    adata_f.obs['gal'] =  (adata_f.raw[:,'Gal'].X>0)*1
    adata_f.obs['glud1'] = (adata_f.raw[:,'Glud1'].X>0)*1

    #Prader-Willi Syndrome/Exocytosis
    adata_f.obs['trappc9'] = (adata_f.raw[:,'Trappc9'].X>0)*1
    adata_f.obs['sim1'] = (adata_f.raw[:,'Sim1'].X>0)*1
    adata_f.obs['syt4'] = (adata_f.raw[:,'Syt4'].X>0)*1
    adata_f.obs['omg'] = (adata_f.raw[:,'Omg'].X>0)*1
    adata_f.obs['snrpn'] = (adata_f.raw[:,'Snrpn'].X>0)*1
    adata_f.obs['sertad4'] = (adata_f.raw[:,'Sertad4'].X>0)*1
    adata_f.obs['asap2'] = (adata_f.raw[:,'Asap2'].X>0)*1
    adata_f.obs['dok6'] = (adata_f.raw[:,'Dok6'].X>0)*1
    adata_f.obs['sacs'] = (adata_f.raw[:,'Sacs'].X>0)*1
    adata_f.obs['abcd2'] = (adata_f.raw[:,'Abcd2'].X>0)*1
    adata_f.obs['phkb'] = (adata_f.raw[:,'Phkb'].X>0)*1
    adata_f.obs['camk2g'] = (adata_f.raw[:,'Camk2g'].X>0)*1
    adata_f.obs['magel2'] = (adata_f.raw[:,'Magel2'].X>0)*1
    adata_f.obs['arnt'] = (adata_f.raw[:,'Arnt'].X>0)*1
    adata_f.obs['otp'] = (adata_f.raw[:,'Otp'].X>0)*1
    adata_f.obs['pou3f2'] = (adata_f.raw[:,'Pou3f2'].X>0)*1
    adata_f.obs['nr2c2'] = (adata_f.raw[:,'Nr2c2'].X>0)*1
    adata_f.obs['fto'] = (adata_f.raw[:,'Fto'].X>0)*1
    
    adata_f.obs.to_csv("adata_f.obs.csv")

#### UMAP of clusters defined by markers

In [38]:
if bool_plot == False:
    sc.pl.umap(adata_f[adata_f.obs['condition']=='CHOW',],
               color=['clust_1','clust_2','clust_3'],
               use_raw=False,
               palette=['blue', 'silver'],
               size=30
    )
    sc.pl.umap(adata_f[adata_f.obs['condition']=='CHOW',],
               color=['clust_4','clust_5'],
               use_raw=False,
               palette=['blue', 'silver'], 
               size=30
    )

In [37]:
if bool_plot == False:
    sc.pl.umap(adata_f[adata_f.obs['condition']=='HFD',],
               color=['clust_1','clust_2','clust_3'],
               use_raw=False, 
               palette=['blue', 'silver'],
               size=30
    )
    sc.pl.umap(adata_f[adata_f.obs['condition']=='HFD',],
               color=['clust_4','clust_5'],
               use_raw=False,
               palette=['blue', 'silver'],
               size=30
    )