# Yada Deconvolution
---

- Run the following cells for deconvolution using Yada.

## 1 - Import Prerequisites.

In [None]:
#On Colab.
!pip install -q tslearn gseapy similaritymeasures
!git clone https://github.com/zurkin1/Yada.git
!mv Yada/* .

In [1]:
%load_ext autoreload
%autoreload 2

from IPython.display import FileLink, FileLinks
import pandas as pd
from yada import *

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)

## 2 - Configure Input Files.

Example input files are in the ./data/ folder.


In [55]:
#This is the mixture file in the format: columns: mix1, mix2, ..., rows: gene names.
mix = './data/Challenge/mix-107019_RNASeq.csv'

#Reference matrix name. Should be normalized as the mix data.
pure = './data/Challenge/pure-107019_RNASeq.csv'

#True cell type proportions file.
labels = './data/Challenge/prop-107019_RNASeq.csv'

## 3 - Run Deconvolution.

In [17]:
mix, pure, gene_list_df = preprocess(mix, pure)
result = run_dtw_deconv(mix, pure, gene_list_df)
result

#Download Result.
#FileLink('data/results.csv')
#from google.colab import files
#files.download('data/results.csv') 

Unnamed: 0,naive.B.cells,memory.B.cells,naive.CD4.T.cells,naive.CD8.T.cells,memory.CD8.T.cells,regulatory.T.cells,monocytes,NK.cells,myeloid.dendritic.cells,neutrophils
mix0,0.0,0.2411721,0.0,0.0,0.08721292,0.1654319,0.1497608,0.01821413,0.150361,0.187847
mix1,0.150872,0.04812451,3.475842e-11,0.0,0.0,0.1723071,0.1845105,0.1111955,0.324417,0.008573
mix2,0.066226,0.1685786,0.06363351,0.07111752,0.2308617,0.06838576,0.08508581,0.03104875,0.03348154,0.18158
mix3,0.307865,0.0,0.1230014,0.1449526,0.0,0.0,0.1824844,0.1204828,0.0,0.125045
mix4,0.115112,6.11855e-15,3.628799e-15,0.02099705,0.1270943,0.1298876,0.2718646,0.2826728,0.05131808,0.001053
mix5,0.316086,0.02227409,7.592709e-10,0.2095167,0.01612603,0.1427691,0.0,0.01334211,0.2040157,0.07587
mix6,0.076283,0.02577672,0.103109,6.54605e-14,0.06159943,1.079747e-12,0.3814416,0.311439,0.01065416,0.029698
mix7,0.274009,0.06965705,0.2306733,0.1430616,1.50248e-12,0.1279961,0.05270309,0.0,0.01124479,0.090655
mix8,0.059322,0.2217562,0.03113964,0.1509374,3.168889e-14,0.3336183,0.0,0.03325314,0.1400284,0.029945
mix9,0.134989,0.05041417,0.2051428,0.0939537,0.001083234,0.05958823,0.1674474,1.854487e-15,0.2171078,0.070273


## 4 - Evaluate Results.

In case true proportions are available.

In [57]:
calc_corr(labels, result)

Unnamed: 0,cell type,Pearson,Spearman,P
0,naive.B.cells,0.884305,0.879699,3.21367e-07
1,memory.B.cells,0.899984,0.834338,4.810522e-06
2,naive.CD4.T.cells,0.810546,0.777109,5.555559e-05
3,naive.CD8.T.cells,0.582671,0.600904,0.005078524
4,memory.CD8.T.cells,0.845725,0.487551,0.02921517
5,regulatory.T.cells,0.655329,0.557895,0.01058503
6,monocytes,0.956189,0.919143,1.043339e-08
7,NK.cells,0.983158,0.930827,2.674426e-09
8,myeloid.dendritic.cells,0.890522,0.881203,2.886085e-07
9,neutrophils,0.970342,0.948872,1.88217e-10


## 5 - Benchmark.

In [113]:
data = pd.read_csv('./data/result.csv', index_col=0)
data.columns = ['metric', 'dataset', 'platform', 'celltype', 'pearson', 'spearman', 'p']
data = data.drop(['pearson', 'p'], axis=1)
data = data.groupby(['metric', 'dataset']).mean().reset_index().pivot(index='dataset', columns='metric', values='spearman').reset_index()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


plt.rcParams["figure.figsize"] = [20,20]

# Make a data frame
#df=pd.DataFrame({'x': range(1,11), 'y1': np.random.randn(10), 'y2': np.random.randn(10)+range(1,11), 'y3': np.random.randn(10)+range(11,21), 'y4': np.random.randn(10)+range(6,16), 'y5': np.random.randn(10)+range(4,14)+(0,0,0,0,0,0,0,-3,-8,-6), 'y6': np.random.randn(10)+range(2,12), 'y7': np.random.randn(10)+range(5,15), 'y8': np.random.randn(10)+range(4,14), 'y9': np.random.randn(10)+range(4,14) })
df = pd.read_csv('./data/result.csv', index_col=0)
df.columns = ['metric', 'dataset', 'platform', 'celltype', 'pearson', 'spearman', 'p']
df = df.drop(['pearson', 'p'], axis=1)
df = df.groupby(['metric', 'dataset']).mean().reset_index().pivot(index='dataset', columns='metric', values='spearman').reset_index()

# Initialize the figure style
plt.style.use('seaborn-darkgrid')
 
# create a color palette
palette = plt.get_cmap('Set1')
 
# multiple line plot
num=0
for column in df.drop('dataset', axis=1):
    num+=1
    
    df = df.sort_values(column, ascending=False)
    # Find the right spot on the plot
    plt.subplot(3,3, num)
 
    # plot every group, but discrete
    for v in df.drop('dataset', axis=1):
        plt.plot(df['dataset'], df[v], marker='', color='grey', linewidth=0.6, alpha=0.3)
 
    # Plot the lineplot
    plt.plot(df['dataset'], df[column], marker='', color=palette(num), linewidth=2.4, alpha=0.9, label=column)
 
    # Same limits for every chart
    plt.xlim(0,10)
    plt.ylim(0,1)
 
    # Not ticks everywhere
    if num in range(7) :
        plt.tick_params(labelbottom='off')
    if num not in [1,4,7] :
        plt.tick_params(labelleft='off')
 
    # Add title
    plt.title(f'{column} ({np.round(df[column].mean(), 2)})', loc='left', fontsize=16, fontweight=0, color=palette(num) )
    plt.xticks(rotation=45)

# general title
plt.suptitle("Spearman correlation of different metrics over ten datasets", fontsize=13, fontweight=0, color='black', style='italic', y=1.02)
 
# Axis titles
#plt.text(0.5, 0.02, 'Time', ha='center', va='center')
#plt.text(0.06, 0.5, 'Note', ha='center', va='center', rotation='vertical')

# Show the graph
plt.show()