# CellphoneDB scoring protocol
**B-cell signaling example**

In [None]:
%%capture
# pip install --force-reinstall "git+https://github.com/ventolab/CellphoneDB.git@scoring"

### Load scanpy object

In [None]:
import scanpy as sc
adata = sc.read('/Users/rp23/Downloads/kevin_bcells_stroma/adata_subset_Bcells_stroma.h5ad')
adata.shape

In [None]:
import os
# The default version of CellphoneDB data is the latest one, but you can change it to a previous version 
# at any point in this notebook (by re-setting the value of cpdb_version variable). 
# Please note that the format of the database from version v4.1.0 is incompatible with that of previous 
# versions, hence the lowest version number you may choose in this notebook is v4.1.0
cpdb_version = "v4.1.9"
# cpdb_dir will contain the *_input.csv and cellphonedb.zip files that you will download from https://github.com/ventolab/cellphonedb-data
# At the very least, please replace <your_user_id> with your user id
cpdb_dir = os.path.join("/Users/rp23/.cpdb/releases", cpdb_version)
cpdb_file_path = os.path.join(cpdb_dir, "cellphonedb.zip")

### Downsample cell types
Protocol is not memory optimized, thus downsampling might be of interest (or request more memory).

In [None]:
# Name of column containing the cell type name
cell_type_col_name = 'cell.labels'
# Percentage of cells that you want to keep for each cell type
# Values between 0-1
downsamp_percentage = 1
# Downsample each cell type to the specifcied percentage
adata_obs = adata.obs.groupby(cell_type_col_name).sample(frac = downsamp_percentage)
adata = adata[list(adata_obs.index)]
adata

### Convert sparse normalized matrix to dense matrix
TODO: This should be optimized to use the sparse matrix rather than dense.

In [None]:
import pandas as pd
# Transpose matrix so genes are in columns and rows are samples
norm_matrix = pd.DataFrame(adata.X.todense(),
                           columns = list(adata.var.index),
                           index = list(adata.obs.index)).transpose()
metadata = adata.obs
# Remove scanpy object to save some memory
del adata

### Apply functions to rank interactions

##### **Step 1**: Filter genes expressed in less than min_pct_cell of cells in a given cell type.

In [None]:
from cellphonedb.utils import scoring_utils
cpdb_f = scoring_utils.filter_genes_per_cell_type(matrix = norm_matrix,
                              metadata = metadata,
                              min_pct_cell = 0.1,
                              cell_column_name = cell_type_col_name)

##### **Step 2**: Calculate the gene's mean expression per cell type.

In [None]:
cpdb_fm = scoring_utils.mean_expression_per_cell_type(matrix = cpdb_f,
                                  metadata = metadata,
                                  cell_column_name = cell_type_col_name)

##### **Step 3**: Calculate geometric expression mean per heteromer

In [None]:
cpdb_fmsh = scoring_utils.heteromer_geometric_expression_per_cell_type(matrix = cpdb_fm,
                                                         cpdb_file_path = cpdb_file_path)

##### **Step 4**: Scale the gene's mean expression across cell types.

In [None]:
cpdb_fms = scoring_utils.scale_expression(cpdb_fmsh,
                            upper_range = 10)

##### **Step 5**: calculate the ligand-receptor score.

In [None]:
import time
t0 = time.time()
cpdb_scoring = scoring_utils.score_product(matrix = cpdb_fmsh, 
                                           cpdb_file_path = cpdb_file_path,
                                           threads = 4)
print(time.time() - t0, "s wall time")
# # ~251s seconds wall time - with no parallelisation

### List all cell-pairs comparison
Results are stored as as dictionary of dataframes, each dataframe is named after the cells being analyzed for cell-cell communication. \
Beware you will find `cell_A|cell_B` but not `cell_B|cell_A`. Each dataframe contains the partners swapped to compare interactions in both directions.

In [None]:
list(cpdb_scoring.keys())[0:10]

### Example of how to query results
Ordering results by the score

In [None]:
example_table = cpdb_scoring['endosteal fibroblast|osteoclast'].sort_values('Score',
                                                                            ascending = False)

In [None]:
example_table.head(20)

____