---
title: "CyTOF: read fcs files and test cofactors"
subtitle: "manual cofactor checking"
author: MKim
format:
  html:
    toc: true
    toc-depth: 3
    toc-title: Contents
    toc-location: left
    toc-float: true
    toc-collapsed: true
    html-math-method: katex
    embed-resources: true
    page-layout: full
    fig-dpi: 120
execute:
  echo: false
---

In [1]:
import bokeh
from bokeh.plotting import show

import flowkit as fk
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
import seaborn as sns

import os

bokeh.io.output_notebook()

In [2]:
if not os.path.isdir('figures_for_cofactor'):
    os.mkdir('figures_for_cofactor')

In [3]:
path_files = "./data_cytof/"

# Reading fcs files in a directory

In [83]:
samples_6 = fk.load_samples(path_files)
samples_6

[Sample(v3.0, Tet2_194_CD3_CD28.fcs, 71 channels, 30562 events),
 Sample(v3.0, Tet2_194_LPS.fcs, 71 channels, 33507 events),
 Sample(v3.0, Tet2_194_VEH.fcs, 71 channels, 14994 events),
 Sample(v3.0, Tet2_205_CD3_CD28.fcs, 71 channels, 23737 events),
 Sample(v3.0, Tet2_205_LPS.fcs, 71 channels, 28403 events),
 Sample(v3.0, Tet2_205_VEH.fcs, 71 channels, 18004 events)]

In [5]:
# function to add sample id to fcs file data
def augmented_df(df, sid):
    df[('sample_id','sample_id')]=sid
    return df

In [6]:
sample_df_list=[augmented_df(samples_6[ii].as_dataframe(source='raw'), samples_6[ii].id) for ii in range(len(samples_6))]
fcs_df=pd.concat(sample_df_list, axis=0)

In [34]:
col_orig=fcs_df.columns
col_orig_list=list(col_orig)

col_name_list2=[]
marker_col=[]
for ii in range(len(col_orig_list)):
    if (col_orig_list[ii][1][0].isdigit())&('_' in col_orig_list[ii][1]):
        rename=col_orig_list[ii][1][(col_orig_list[ii][1].index('_')+1):len(col_orig_list[ii][1])] + \
        '_' + col_orig_list[ii][1][0:(col_orig_list[ii][1].index('_'))]
        rename=rename.replace('-','')
        col_name_list2.append((col_orig_list[ii][0], rename))
        # print(rename)
        marker_col.append(ii)
    else:
        col_name_list2.append(col_orig_list[ii])

marker_col.append(ii)

In [10]:
new_columns=pd.MultiIndex.from_arrays([[x[0] for x in col_name_list2],[y[1] for y in col_name_list2]], 
                          names=['pnn', 'pns'])

new_columns_single = [x[1] for x in new_columns]
new_columns_single
fcs_df.columns=new_columns_single

# all fcs data combined as a DataFrame

In [47]:
fcs_df.head()

Unnamed: 0,Time,Event_length,CD45_89Y,102Pd,104Pd,105Pd,106Pd,108Pd,110Pd,HLADR_111Cd,...,208Pb,CD16_209Bi,Center,Offset,Width,Residual,beadDist,bc_separation_dist,mahalanobis_dist,sample_id
0,661.041992,22.0,412.64502,25.77763,840.156738,62.351097,621.207642,35.14933,663.321167,3.907504,...,0.0,2.712706,665.995483,63.749474,41.665462,99.399338,146.21434,0.427387,2.119524,Tet2_194_CD3_CD28.fcs
1,1606.848999,30.0,86.169205,19.263371,754.377319,63.770561,702.919067,34.960449,615.405823,17.8829,...,0.039206,0.57464,1014.872131,83.073563,74.469383,120.216301,84.530914,0.421493,2.356081,Tet2_194_CD3_CD28.fcs
2,2134.166992,27.0,235.820526,43.977959,682.236694,61.585442,506.093597,29.748413,420.823425,22.905087,...,0.107103,1.655213,1008.516785,116.71431,89.460983,135.712341,155.546814,0.357073,2.992274,Tet2_194_CD3_CD28.fcs
3,2726.041992,34.0,401.721039,17.634621,486.760681,45.908226,421.965057,32.385147,372.157776,18.211843,...,0.0,0.0,1193.339722,84.541939,87.02713,153.43071,123.058891,0.387981,1.092942,Tet2_194_CD3_CD28.fcs
4,3270.702881,26.0,340.457306,21.509212,665.266113,50.862789,549.097961,21.249895,520.45343,4.58299,...,2.780666,0.446503,871.395264,84.206436,65.056786,101.099045,139.366653,0.431778,2.244912,Tet2_194_CD3_CD28.fcs


# trimmed channels that are not used

In [84]:
fcs_df_trimmed = fcs_df.iloc[:,marker_col]
fcs_df_trimmed.head()

Unnamed: 0,CD45_89Y,HLADR_111Cd,CD3_112Cd,CD4_114Cd,CD8_116Cd,CD196_141Pr,CD19_142Nd,CD127_143Nd,CD38_144Nd,CD1c_145Nd,...,IgM_172Yb,CD184_173Yb,CD279_174Yb,TNFa_175Lu,CD56_176Yb,CD45_195Pt,CD45_196Pt,CD45_198Pt,CD16_209Bi,sample_id
0,412.64502,3.907504,78.869453,0.064539,45.691082,3.176548,0.0,3.065427,17.981371,1.267487,...,2.797745,5.964279,2.519733,0.423804,0.622348,2.881647,0.0,0.0,2.712706,Tet2_194_CD3_CD28.fcs
1,86.169205,17.8829,96.898453,0.0,36.989292,0.0,0.0,0.126138,26.494999,0.0,...,0.646155,0.039602,0.018124,84.315346,1.118604,3.115852,2.533583,0.0,0.57464,Tet2_194_CD3_CD28.fcs
2,235.820526,22.905087,35.147606,38.417339,0.718573,0.0,1.577704,2.337688,5.960651,0.0,...,0.0,0.667073,11.2427,0.941599,0.012648,1.672839,0.748688,0.0,1.655213,Tet2_194_CD3_CD28.fcs
3,401.721039,18.211843,168.43988,4.329476,181.127151,0.0,1.58956,3.125704,32.871078,0.0,...,7.390351,34.851997,2.578116,3.053176,0.0,0.0,0.111948,0.0,0.0,Tet2_194_CD3_CD28.fcs
4,340.457306,4.58299,103.852966,50.595531,0.0,0.0,0.0,9.44114,8.959432,0.0,...,0.66757,140.122437,0.0,4.926303,0.758325,0.010309,0.0,0.0,0.446503,Tet2_194_CD3_CD28.fcs


In [37]:
fcs_df_trimmed.shape

(149207, 44)

# Plot marker densit plot with various cofactors

In [85]:
dataset_name='Tet2'

plt.rcParams['font.size'] = 8 # Set default font size

for cf in [x*0.5 for x in range(2,3)]:

    num_col=int(np.ceil(np.sqrt(fcs_df_trimmed.shape[1]-1)))
    num_row=int(np.ceil((fcs_df_trimmed.shape[1]-1)/num_col))
    
    fig, axes = plt.subplots(num_row, num_col, 
                             figsize=(2.0*num_col, 1.5*num_row) ) # 1 row, 2 columns
    # print(fcs_df_trimmed.head())
    num_markers=fcs_df_trimmed.shape[1]-1
    fcs_df_trimmed_transformed = fcs_df_trimmed.copy()
    fcs_df_trimmed_transformed.iloc[:,0:num_markers] = np.arcsinh(fcs_df_trimmed_transformed.iloc[:,0:num_markers]/cf)
      
    for ii in range(num_markers):
        row_index= ii//num_col
        col_index= ii%num_col
    
        marker = fcs_df_trimmed_transformed.columns[ii]
    
        sns.kdeplot(data=fcs_df_trimmed_transformed, x=marker,bw_adjust=.75, hue='sample_id', 
                legend=False, ax=axes[row_index,col_index])
        axes[row_index,col_index].set_title(f"cofactor {cf}", fontsize=10)
    
    
    plt.tight_layout()
    
    plt.savefig(f'figures_for_cofactor/{dataset_name}_cofactor_{cf}.png')
    # plt.clf()

    del fcs_df_trimmed_transformed

plt.close('all')

## With a cofactor = 1.0
![](figures_for_cofactor/Tet2_cofactor_1.0.png)

## With a cofactor = 6.0
![](figures_for_cofactor/Tet2_cofactor_6.0.png)