# Yada Deconvolution

---



- Run the following cells for deconvolution using Yada.

## 1 - Import prerequisites

In [None]:
from IPython.display import FileLink, FileLinks
import pandas as pd

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)

!pip install -q tslearn
!pip -q install gseapy
!git clone https://github.com/zurkin1/Yada.git
!mv Yada/* .

from yada import *

[?25l[K     |▍                               | 10 kB 18.5 MB/s eta 0:00:01[K     |▉                               | 20 kB 22.0 MB/s eta 0:00:01[K     |█▎                              | 30 kB 21.6 MB/s eta 0:00:01[K     |█▋                              | 40 kB 22.9 MB/s eta 0:00:01[K     |██                              | 51 kB 25.1 MB/s eta 0:00:01[K     |██▌                             | 61 kB 26.5 MB/s eta 0:00:01[K     |███                             | 71 kB 27.9 MB/s eta 0:00:01[K     |███▎                            | 81 kB 25.5 MB/s eta 0:00:01[K     |███▊                            | 92 kB 27.2 MB/s eta 0:00:01[K     |████▏                           | 102 kB 29.0 MB/s eta 0:00:01[K     |████▌                           | 112 kB 29.0 MB/s eta 0:00:01[K     |█████                           | 122 kB 29.0 MB/s eta 0:00:01[K     |█████▍                          | 133 kB 29.0 MB/s eta 0:00:01[K     |█████▉                          | 143 kB 29.0 MB/s eta 0:

## 2 - Configure Input Files.

Example input files are in the ./data/ folder.


In [None]:
#This is the mixture file in the format: columns: mix1, mix2, ..., rows: gene names.
mix = './data/Abbas/mix.csv'

#Reference matrix name. Should be normalized as the mix data.
pure = './data/Abbas/pure.csv'

#True cell type proportions file.
labels = './data/Abbas/labels.csv'

## 3 - Run deconvolution.

In [None]:
#Select deconvolution method. Available methods: run_dtw_deconv (recommended for Microarray), cibersort, dsection, lasso, nnls_deconv_constrained, pxcell (recommended for RNASeq).
result = run_dtw_deconv(mix, pure)
result

#Download Result.
#FileLink('data/results.csv')
#from google.colab import files
#files.download('data/results.csv') 

Deconvolution, num_cells: 4, num_mixes: 12
 99%

Unnamed: 0,Jurkat,IM-9,Raji,THP-1
0,0.58819,0.011821,0.098863,0.259721
1,0.572269,0.006847,0.101012,0.239512
2,0.643786,0.004184,0.123546,0.327937
3,0.176337,0.309265,0.427278,0.000176
4,0.180367,0.296176,0.544721,0.053164
5,0.178218,0.308456,0.50893,0.027433
6,0.028896,0.732686,0.009544,0.248162
7,0.026462,0.772282,0.002873,0.235702
8,0.027948,0.71241,0.020035,0.2668
9,0.002617,0.420784,0.289749,0.316305


## 4 - Evaluate Results.

In case true proportions are available.

In [None]:
calc_corr(labels, result)

Jurkat, 0.9926177635639944, SpearmanrResult(correlation=0.9716254134469436, pvalue=1.3811738967574605e-07)
IM-9, 0.9893186560070362, SpearmanrResult(correlation=0.9716254134469436, pvalue=1.3811738967574605e-07)
Raji, 0.9855374539708254, SpearmanrResult(correlation=0.9716254134469436, pvalue=1.3811738967574605e-07)
THP-1, 0.9415739604902392, SpearmanrResult(correlation=0.6909336273400487, pvalue=0.012843447163603845)


## 5 - Another Example With RNASeq.

Using xCell ported to Python.

Aran, Hu and Butte, xCell: digitally portraying the tissue cellular heterogeneity landscape. Genome Biology (2017) 18:220.

In [None]:
#This is the mixture file in the format: columns: mix1, mix2, ..., rows: gene names.
mix = './data/Challenge/input/mix-107019.csv'

pxcell(mix)

Number of samples: 20, number of gene sets: 489


Unnamed: 0_level_0,mix0,mix1,mix2,mix3,mix4,mix5,mix6,mix7,mix8,mix9,mix10,mix11,mix12,mix13,mix14,mix15,mix16,mix17,mix18,mix19
cell_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Adipocytes,0.000000,0.002355,0.000000,0.000104,0.000000,0.000000,0.000003,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.003059,0.002760
Astrocytes,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
B-cells,0.060759,0.045040,0.053219,0.065533,0.000000,0.104229,0.000000,0.133439,0.080371,0.000947,0.111579,0.033880,0.000000,0.133060,0.112339,0.045254,0.058061,0.000000,0.000000,0.022430
Basophils,0.000000,0.000000,0.000000,0.025438,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.030487,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
CD4+ T-cells,0.000000,0.000000,0.064289,0.000000,0.000000,0.044092,0.000000,0.058988,0.047214,0.047096,0.000000,0.031886,0.000000,0.000000,0.008516,0.000024,0.102259,0.081592,0.001961,0.046897
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ly Endothelial cells,0.000524,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000192,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
mv Endothelial cells,0.000188,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
naive B-cells,0.000000,0.002835,0.000000,0.022392,0.004026,0.021188,0.000000,0.000000,0.000000,0.000870,0.000000,0.011545,0.003523,0.012027,0.015073,0.000000,0.017573,0.003951,0.000000,0.006758
pDC,0.000000,0.003228,0.000000,0.003777,0.002855,0.005337,0.002300,0.000907,0.000000,0.000000,0.000857,0.000000,0.002008,0.032735,0.017715,0.000000,0.000000,0.000000,0.000000,0.000000
