# X-Inactivation Cell Type Differences DV Analysis

Detection of X-inactivation via differential variance

No detectable differences.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import scanpy.api as sc
import scipy as sp
import itertools
import numpy as np
import scipy.stats as stats
from scipy.integrate import dblquad
import scipy.sparse as sparse
import seaborn as sns
import imp
import time
from statsmodels.stats.multitest import fdrcorrection

  from ._conv import register_converters as _register_converters


In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
data_path = '/Users/mincheolkim/Google Drive/UCSF/research/parameter_estimation/x_inactivation_data/'

### Read cell type list

In [5]:
ct_list = pd.read_csv(data_path + 'lupus_ct_list.csv')['ct_cov'].tolist()

### Read the AnnData object

In [6]:
adata = sc.read(data_path + 'lupus_annotated_nonorm_V6_x_genes.h5ad')

### Read DE and DV results

The results are computed in the `compute_x_inactivation_statistics.py` script in the Wynton cluster.

In [7]:
female_de_pval = pd.DataFrame(
    data=np.vstack([np.load(data_path + 'ct_statistics/1_{}_de_pvals.npy'.format(ct)) for ct in ct_list]),
    index=ct_list,
    columns=adata.var.index).T

female_dv_pval = pd.DataFrame(
    data=np.vstack([np.load(data_path + 'ct_statistics/1_{}_dv_pvals.npy'.format(ct)) for ct in ct_list]),
    index=ct_list,
    columns=adata.var.index).T

male_de_pval = pd.DataFrame(
    data=np.vstack([np.load(data_path + 'ct_statistics/0_{}_de_pvals.npy'.format(ct)) for ct in ct_list]),
    index=ct_list,
    columns=adata.var.index).T

male_dv_pval = pd.DataFrame(
    data=np.vstack([np.load(data_path + 'ct_statistics/0_{}_dv_pvals.npy'.format(ct)) for ct in ct_list]),
    index=ct_list,
    columns=adata.var.index).T

### FDR correction

In [8]:
def fdr_correct_df(df):
    
    _df = df.copy()
    
    for col in df.columns:
        
        pvals = df[col].values
        fdr = np.full_like(pvals, np.nan, dtype=np.double)
        _, fdr[~np.isnan(pvals)] = fdrcorrection(pvals[~np.isnan(pvals)], alpha=0.2)
        _df[col] = fdr
    return _df

In [9]:
female_de_fdr = fdr_correct_df(female_de_pval)
female_dv_fdr = fdr_correct_df(female_dv_pval)
male_de_fdr = fdr_correct_df(male_de_pval)
male_dv_fdr = fdr_correct_df(male_dv_pval)

### Find female specific differentially variable genes

In [21]:
female_dv_hits = (female_dv_fdr < 0.1) & (male_dv_fdr > 0.3)
female_dv_hits['num_ct_hits'] = female_dv_hits.sum(axis=1)
female_dv_hits = female_dv_hits.query('num_ct_hits > 0')

In [22]:
female_dv_hits.sum()

Tc              0.0
Th              0.0
cM             11.0
NK              0.0
B               0.0
ncM             0.0
pDC             0.0
MK              0.0
DB              0.0
cDC             2.0
ProlifT         0.0
Progen          0.0
num_ct_hits    13.0
dtype: float64

In [23]:
female_dv_hits.sort_values('num_ct_hits', ascending=False).tail(10)

Unnamed: 0_level_0,Tc,Th,cM,NK,B,ncM,pDC,MK,DB,cDC,ProlifT,Progen,num_ct_hits
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
GDI1,False,False,True,False,False,False,False,False,False,False,False,False,1
WDR13,False,False,False,False,False,False,False,False,False,True,False,False,1
NAP1L6,False,False,True,False,False,False,False,False,False,False,False,False,1
MTRNR2L10,False,False,True,False,False,False,False,False,False,False,False,False,1
FUNDC2,False,False,True,False,False,False,False,False,False,False,False,False,1
PHKA1,False,False,True,False,False,False,False,False,False,False,False,False,1
LAGE3,False,False,True,False,False,False,False,False,False,False,False,False,1
GABRE,False,False,True,False,False,False,False,False,False,False,False,False,1
XK,False,False,True,False,False,False,False,False,False,False,False,False,1
GNL3L,False,False,True,False,False,False,False,False,False,False,False,False,1


### Save these hits

In [69]:
female_dv_hits.to_csv(data_path + 'female_specific_ct_dv.csv')