# Yada Deconvolution

---



- Run the following cells for deconvolution using Yada.

## 1 - Import prerequisites

In [2]:
%load_ext autoreload
%autoreload 2
from IPython.display import FileLink, FileLinks
import pandas as pd

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)

!pip install -q tslearn
!pip -q install gseapy
!git clone https://github.com/zurkin1/Yada.git
!mv Yada/* .
from yada import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[K     |████████████████████████████████| 793 kB 8.3 MB/s 
[K     |████████████████████████████████| 526 kB 6.9 MB/s 
[K     |████████████████████████████████| 202 kB 55.8 MB/s 
[K     |████████████████████████████████| 44 kB 2.7 MB/s 
[K     |████████████████████████████████| 47 kB 4.2 MB/s 
[K     |████████████████████████████████| 144 kB 60.5 MB/s 
[K     |████████████████████████████████| 5.8 MB 32.4 MB/s 
[K     |████████████████████████████████| 251 kB 55.4 MB/s 
[K     |████████████████████████████████| 127 kB 69.0 MB/s 
[?25h  Building wheel for bioservices (setup.py) ... [?25l[?25hdone
  Building wheel for easydev (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datascience 0.10.6 requires folium==0.2.1, but you have f

## 2 - Configure Input Files.

Example input files are in the ./data/ folder.


In [6]:
#This is the mixture file in the format: columns: mix1, mix2, ..., rows: gene names.
mix = './data/DSA/mix.csv'

#Reference matrix name. Should be normalized as the mix data.
pure = './data/DSA/pure.csv'

#True cell type proportions file.
labels = './data/DSA/labels.csv'

## 3 - Run deconvolution.

In [7]:
#Select deconvolution method. Available methods: run_dtw_deconv (recommended for Microarray), cibersort, dsection, lasso, nnls_deconv_constrained, pxcell (recommended for RNASeq).
result = run_dtw_deconv(mix, pure)
result

#Download Result.
#FileLink('data/results.csv')
#from google.colab import files
#files.download('data/results.csv') 

Deconvolution, num_cells: 3, num_mixes: 11
 99%

Unnamed: 0,liver,brain,lung
0,6.460968e-18,0.3167661,0.5552969
1,0.4162209,3.107207e-18,0.4454296
2,0.2330463,0.5609162,0.1292747
3,0.4082744,0.3874963,0.1857644
4,0.333844,0.4807197,0.2459628
5,0.3738905,0.3094182,0.4195339
6,0.3562263,0.3977073,0.3715767
7,0.3697132,0.4013365,0.3234827
8,0.3516907,0.4588357,0.2487138
9,0.3808354,0.4351044,0.1587021


## 4 - Evaluate Results.

In case true proportions are available.

In [8]:
calc_corr(labels, result)

liver, 0.9550017957569228, SpearmanrResult(correlation=0.9931584145359876, pvalue=1.05041318021364e-09)
brain, 0.8677283779011553, SpearmanrResult(correlation=0.986311652350802, pvalue=2.3575474558163253e-08)
lung, 0.852423140518431, SpearmanrResult(correlation=0.9862693712811287, pvalue=2.390348942277324e-08)


## 5 - Another Example With RNASeq.

Using xCell: ran, Hu and Butte, xCell: digitally portraying the tissue cellular heterogeneity landscape. Genome Biology (2017) 18:220.

In [None]:
#This is the mixture file in the format: columns: mix1, mix2, ..., rows: gene names.
mix = './data/Challenge/input/mix-107019.csv'

#True cell type proportions file.
#labels = './data/Abbas/labels.csv'

pxcell(mix)

Number of samples: 20, number of gene sets: 489


Unnamed: 0_level_0,mix0,mix1,mix2,mix3,mix4,mix5,mix6,mix7,mix8,mix9,mix10,mix11,mix12,mix13,mix14,mix15,mix16,mix17,mix18,mix19
cell_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Adipocytes,0.000000,0.002355,0.000000,0.000104,0.000000,0.000000,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.003059,0.002760
Astrocytes,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
B-cells,0.060759,0.045040,0.053219,0.065533,0.000000,0.104229,0.000000,0.133439,0.080371,0.000947,0.111579,0.033880,0.000000,0.133060,0.112339,0.045254,0.058061,0.000000,0.000000,0.022430
Basophils,0.000000,0.000000,0.000000,0.025438,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.030487,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
CD4+ T-cells,0.000000,0.000000,0.064289,0.000000,0.000000,0.044092,0.000000,0.058988,0.047214,0.047096,0.000000,0.031886,0.000000,0.000000,0.008516,0.000024,0.102259,0.081592,0.001961,0.046897
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ly Endothelial cells,0.000524,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000192,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
mv Endothelial cells,0.000188,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
naive B-cells,0.000000,0.002835,0.000000,0.022392,0.004026,0.021188,0.000000,0.000000,0.000000,0.000870,0.000000,0.011545,0.003523,0.012027,0.015073,0.000000,0.017573,0.003951,0.000000,0.006758
pDC,0.000000,0.003228,0.000000,0.003777,0.002855,0.005337,0.002300,0.000907,0.000000,0.000000,0.000857,0.000000,0.002008,0.032735,0.017715,0.000000,0.000000,0.000000,0.000000,0.000000
