In [1]:
%%capture output
!pip install --upgrade pip
!pip install --upgrade pandas
!pip install tables   
# necessary for pd.read_hdf()

!pip install ipywidgets
!pip install --upgrade jupyter
!pip install IProgress
!pip install catboost
!pip install shap
!pip install anndata

In [2]:
import os
import random
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, GroupKFold
import scipy
import anndata as ad
import shap

--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy-cuda11x, cupy-cuda12x

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

--------------------------------------------------------------------------------



In [3]:
%matplotlib inline
from tqdm.notebook import tqdm
import gc
import pickle

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

device = torch.device("cuda") if torch.cuda.is_available() else 'cpu'
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

## data load

In [4]:
lrz_path = '/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di93zoj/open-problems-multimodal-3rd-solution/'

model_path_for_now = '/dss/dsshome1/02/di93zoj/valentina/open-problems-multimodal-3rd-solution/'

raw_path =  lrz_path + 'input/raw/'  # '../../../input/raw/'

cite_target_path = lrz_path + 'input/target/cite/'   # '../../../input/target/cite/'
cite_feature_path = lrz_path + 'input/features/cite/'   # '../../../input/features/cite/'
cite_mlp_path = lrz_path + 'model/cite/mlp/'   # '../../../model/cite/mlp/'   # '../../../model/cite/mlp/'
cite_cb_path = lrz_path + 'model/cite/cb/'   # '../../../model/cite/cb/'

multi_target_path = lrz_path + 'input/target/multi/'   # '../../../input/target/multi/'
multi_feature_path = lrz_path + 'input/features/multi/'   # '../../../input/features/multi/'
multi_mlp_path = lrz_path + 'model/multi/mlp/'   # '../../../model/multi/mlp/'
multi_cb_path = lrz_path + 'model/multi/cb/'   # '../../../model/multi/cb/'

index_path = lrz_path + 'input/preprocess/cite/'

output_path = lrz_path + 'output/'   # '../../../output/'

## Cite

In [5]:
# get model name
#mlp_model_path = os.listdir(cite_mlp_path)

markdown

In [None]:
# check model names and lists/dict/...

In [6]:
mlp_model_name = [
    'corr_add_con_imp',
    'corr_last_v3', 
    'corr_c_add_w2v_v1_mish_flg',
    'corr_c_add_w2v_v1_flg',
    'corr_c_add_84_v1',
    'corr_c_add_120_v1',
    'corr_w2v_cell_flg',
    'corr_best_cell_120',
    'corr_cluster_cell',
    'corr_w2v_128',
    'corr_imp_w2v_128',
    'corr_snorm',
    'corr_best_128',
    'corr_best_64',
    'corr_cluster_128',
    'corr_cluster_64',
    'corr_svd_128',
    'corr_svd_64',
             ]

In [7]:
model_name_list = []

for i in mlp_model_name:
    for num, j in enumerate(os.listdir(cite_mlp_path)):
        if i in j:
            model_name_list.append(j)

len(model_name_list)
model_name_list

['cite_mlp_corr_add_con_imp_flg_donor_val_50',
 'cite_mlp_corr_last_v3_flg_donor_val_55',
 'cite_mlp_corr_c_add_w2v_v1_mish_flg_donor_val_66',
 'cite_mlp_corr_c_add_w2v_v1_flg_donor_val_66',
 'cite_mlp_corr_c_add_84_v1_flg_donor_val_47',
 'cite_mlp_corr_c_add_120_v1_flg_donor_val_63',
 'cite_mlp_corr_w2v_cell_flg_donor_val_51',
 'cite_mlp_corr_best_cell_120_flg_donor_val_51',
 'cite_mlp_corr_cluster_cell_flg_donor_val_64',
 'cite_mlp_corr_w2v_128_flg_donor_val_42',
 'cite_mlp_corr_imp_w2v_128_flg_donor_val_38',
 'cite_mlp_corr_snorm_flg_donor_val_39',
 'cite_mlp_corr_best_128_flg_donor_val_45',
 'cite_mlp_corr_best_64_flg_donor_val_50',
 'cite_mlp_corr_cluster_128_flg_donor_val_51',
 'cite_mlp_corr_cluster_64_flg_donor_val_57',
 'cite_mlp_corr_svd_128_flg_donor_val_30',
 'cite_mlp_corr_svd_64_flg_donor_val_38']

In [8]:
weight = [1, 0.3, 1, 1, 1, 1, 1, 1, 1, 0.8, 0.8, 0.8, 0.8, 0.5, 0.5, 0.5, 1, 1, 2, 2]
weight_sum = np.array(weight).sum()
weight_sum

model_feat_dict = {model_name_list[0]:['X_test_add_con_imp.pickle', 1],
                   model_name_list[1]:['X_test_last_v3.pickle', 0.3],
                   model_name_list[2]:['X_test_c_add_w2v_v1.pickle', 1],
                   model_name_list[3]:['X_test_c_add_w2v_v1.pickle', 1],
                   model_name_list[4]:['X_test_c_add_84_v1.pickle', 1],
                   model_name_list[5]:['X_test_c_add_v1.pickle', 1],
                   
                   model_name_list[6]:['X_test_feature_w2v_cell.pickle', 1],
                   model_name_list[7]:['X_test_best_cell_128_120.pickle', 1],
                   model_name_list[8]:['X_test_cluster_cell_128.pickle', 1],
                   
                   model_name_list[9]:['X_test_feature_w2v.pickle', 0.8],
                   model_name_list[10]:['X_test_feature_imp_w2v.pickle',0.8],
                   model_name_list[11]:['X_test_feature_snorm.pickle', 0.8],
                   model_name_list[12]:['X_test_best_128.pickle', 0.8],
                   model_name_list[13]:['X_test_best_64.pickle', 0.5],
                   model_name_list[14]:['X_test_cluster_128.pickle', 0.5],
                   model_name_list[15]:['X_test_cluster_64.pickle', 0.5],
                   model_name_list[16]:['X_test_svd_128.pickle', 1],
                   model_name_list[17]:['X_test_svd_64.pickle', 1],
                   
                   'best_128':['X_test_best_128.pickle', 2],
                   'best_64':['X_test_best_64.pickle', 2],
                  }

### cite model

In [9]:
def std(x):
    x = np.array(x)
    return (x - x.mean(1).reshape(-1, 1)) / x.std(1).reshape(-1, 1)

In [10]:
class CiteDataset(Dataset):
    
    def __init__(self, feature, target):
        
        self.feature = feature
        self.target = target
        
    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self, index):
                
        d = {
            "X": self.feature[index],
            "y" : self.target[index],
        }
        return d

In [11]:
class CiteDataset_test(Dataset):
    
    def __init__(self, feature):
        self.feature = feature
        
    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self, index):
                
        d = {
            "X": self.feature[index]
        }
        return d

In [12]:
def partial_correlation_score_torch_faster(y_true, y_pred):
    """Compute the correlation between each rows of the y_true and y_pred tensors.
    Compatible with backpropagation.
    """
    y_true_centered = y_true - torch.mean(y_true, dim=1)[:,None]
    y_pred_centered = y_pred - torch.mean(y_pred, dim=1)[:,None]
    cov_tp = torch.sum(y_true_centered*y_pred_centered, dim=1)/(y_true.shape[1]-1)
    var_t = torch.sum(y_true_centered**2, dim=1)/(y_true.shape[1]-1)
    var_p = torch.sum(y_pred_centered**2, dim=1)/(y_true.shape[1]-1)
    return cov_tp/torch.sqrt(var_t*var_p)

def correl_loss(pred, tgt):
    """Loss for directly optimizing the correlation.
    """
    return -torch.mean(partial_correlation_score_torch_faster(tgt, pred))

In [13]:
class CiteModel(nn.Module):
    
    def __init__(self, feature_num):
        super(CiteModel, self).__init__()
        
        self.layer_seq_256 = nn.Sequential(nn.Linear(feature_num, 256),
                                           nn.Linear(256, 128),
                                       nn.LayerNorm(128),
                                       nn.ReLU(),
                                      )
        self.layer_seq_64 = nn.Sequential(nn.Linear(128, 64),
                                       nn.Linear(64, 32),
                                       nn.LayerNorm(32),
                                       nn.ReLU(),
                                      )
        self.layer_seq_8 = nn.Sequential(nn.Linear(32, 16),
                                         nn.Linear(16, 8),
                                       nn.LayerNorm(8),
                                       nn.ReLU(),
                                      )
        
        self.head = nn.Linear(128 + 32 + 8, 140)
                   
    def forward(self, X, y=None):
        
        from_numpy = False
        
      ##
        if isinstance(X, np.ndarray):
            X = torch.from_numpy(X)
            from_numpy = True
        X = X.to(device)  # Move the input to the appropriate device if necessary
        ##
        X_256 = self.layer_seq_256(X)
        X_64 = self.layer_seq_64(X_256)
        X_8 = self.layer_seq_8(X_64)
        
        X = torch.cat([X_256, X_64, X_8], axis = 1)
        out = self.head(X)
        
        if from_numpy:
            out = out.cpu().detach().numpy()
            
        return out

In [14]:
class CiteModel_mish(nn.Module):
    
    def __init__(self, feature_num):
        super(CiteModel_mish, self).__init__()
        
        self.layer_seq_256 = nn.Sequential(nn.Linear(feature_num, 256),
                                           nn.Linear(256, 128),
                                       nn.LayerNorm(128),
                                       nn.Mish(),
                                      )
        self.layer_seq_64 = nn.Sequential(nn.Linear(128, 64),
                                       nn.Linear(64, 32),
                                       nn.LayerNorm(32),
                                       nn.Mish(),
                                      )
        self.layer_seq_8 = nn.Sequential(nn.Linear(32, 16),
                                         nn.Linear(16, 8),
                                       nn.LayerNorm(8),
                                       nn.Mish(),
                                      )
        
        self.head = nn.Linear(128 + 32 + 8, 140)
                   
    def forward(self, X, y=None):
    
        X_256 = self.layer_seq_256(X)
        X_64 = self.layer_seq_64(X_256)
        X_8 = self.layer_seq_8(X_64)
        
        X = torch.cat([X_256, X_64, X_8], axis = 1)
        out = self.head(X)
        
        return out

In [15]:
def train_loop(model, optimizer, loader, epoch):
    
    losses, lrs = [], []
    model.train()
    optimizer.zero_grad()
    #loss_fn = nn.MSELoss()
    
    with tqdm(total=len(loader),unit="batch") as pbar:
        pbar.set_description(f"Epoch{epoch}")
        
        for d in loader:
            X = d['X'].to(device)
            y = d['y'].to(device)
            
            logits = model(X)
            loss = correl_loss(logits, y)
            #loss = torch.sqrt(loss_fn(logits, y))
        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            pbar.set_postfix({"loss":loss.item()})
            pbar.update(1)

    return model

In [16]:
def valid_loop(model, loader, y_val):
    
    model.eval()
    partial_correlation_scores = []
    oof_pred = []
    
    for d in loader:
        with torch.no_grad():
            val_X = d['X'].to(device).float()
            val_y = d['y'].to(device)
            logits = model(val_X)
            oof_pred.append(logits)
    
    #print(torch.cat(oof_pred).shape, torch.cat(oof_pred).detach().cpu().numpy().shape)
    cor = partial_correlation_score_torch_faster(torch.tensor(y_val).to(device), torch.cat(oof_pred))
    cor = cor.mean().item()
    logits = torch.cat(oof_pred).detach().cpu().numpy()
    
    return logits, cor

In [17]:
def test_loop(model, loader):
    
    model.eval()
    predicts=[]

    for d in tqdm(loader):
        with torch.no_grad():
            X = d['X'].to(device)
            logits = model(X)
            predicts.append(logits.detach().cpu().numpy())
            
    return np.concatenate(predicts)

### pred

In [21]:
# model #16: cite_mlp_corr_svd_128_flg_donor_val_30
pred_16 = np.zeros([48203, 140])

i = 'cite_mlp_corr_svd_128_flg_donor_val_30'
        
test_file = model_feat_dict[i][0]
# test_weight = model_feat_dict[i][1]
X_test = pd.read_pickle(cite_feature_path  + test_file)
X_test = np.array(X_test)
feature_dims = X_test.shape[1]

test_ds = CiteDataset_test(X_test)
test_dataloader = DataLoader(test_ds, batch_size=128, pin_memory=True, 
                              shuffle=False, drop_last=False, num_workers=4)

if 'mish' in i:
    model = CiteModel_mish(feature_dims)
else:
    model = CiteModel(feature_dims)
    
model = model.to(device)
model.load_state_dict(torch.load(f'{cite_mlp_path}/{i}'))

pred_16 = test_loop(model, test_dataloader).astype(np.float32)

torch.cuda.empty_cache()
        
pd.DataFrame(pred_16)   # double check train_cite_targets.h5  -> omnipath

  0%|          | 0/377 [00:00<?, ?it/s]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139
0,-1.066433,-1.500550,-1.354774,1.106190,0.869803,3.481040,5.067837,-1.570396,-1.444474,-1.320852,-1.612052,-1.534098,-1.428519,-1.496802,3.095008,-1.384286,2.648872,1.938160,-0.093022,-1.635664,-1.428120,-0.072564,-1.464633,-1.333522,4.528148,-1.577123,-1.570215,-1.564498,-1.633450,-1.551511,-1.606082,-1.488960,-1.542044,-1.542291,-1.515924,-1.584970,-1.483199,6.078822,-1.523234,-1.523154,-1.628240,-1.568818,-1.554284,-1.576391,-1.607019,-1.563618,-1.300677,-1.282699,4.529342,-1.497951,-1.504462,-1.616715,-1.604992,-1.590265,-0.913128,-0.373880,-1.571010,1.285669,-1.561719,-1.562544,-1.531397,-1.546362,-1.493735,-1.614805,-1.581547,-1.436415,-1.352806,-1.433268,0.004768,-1.507400,-1.582382,-1.650515,-1.553527,5.308838,-1.614917,2.937888,-1.565187,-0.132013,-1.512913,-1.525913,-0.179205,-1.617044,-1.141805,-1.497630,-1.589817,-1.619898,-1.517949,-1.527373,-1.324593,-1.536239,-1.349147,-1.607809,-1.596236,-1.449520,-0.069496,-1.395786,-1.459246,2.709844,-1.504532,2.281004,1.127239,-1.582563,1.390550,-1.485366,-0.814275,-1.483867,1.818746,-1.166362,3.551317,-1.324322,-1.333104,-0.007823,-1.370018,-1.791438,-1.608289,-0.100262,-0.915906,-1.476855,-1.569585,4.021511,-1.475899,0.135406,-1.601376,-1.499214,-1.508077,-1.504729,-1.617144,-1.285858,-1.497512,-1.602304,-1.585749,2.483784,-1.404202,-1.656756,-1.628249,-1.330432,-1.008113,-1.255932,-0.622936,-0.130905
1,-1.067110,-1.506955,-1.399289,1.075526,0.997081,3.506622,5.481764,-1.564466,-1.460833,-1.301538,-1.630497,-1.572419,-1.490347,-1.531179,3.168335,-1.427267,2.470253,2.130677,-0.441134,-1.637434,-1.472109,-0.058242,-1.472134,-1.346540,4.738451,-1.580705,-1.617835,-1.605957,-1.660571,-1.576319,-1.645333,-1.515913,-1.583853,-1.545092,-1.542181,-1.602096,-1.507277,6.134613,-1.544582,-1.598276,-1.658122,-1.569817,-1.591216,-1.618025,-1.622394,-1.573258,-1.365324,-1.374641,4.661467,-1.514943,-1.525232,-1.650555,-1.612984,-1.614313,-1.050634,-0.368892,-1.596666,0.777737,-1.589263,-1.581116,-1.578929,-1.556408,-1.516185,-1.611275,-1.630935,-1.470598,-1.411295,-1.485477,-0.118224,-1.505585,-1.588891,-1.651967,-1.588987,5.179552,-1.639723,2.675957,-1.584337,-0.139389,-1.552935,-1.552968,-0.562470,-1.613981,-1.148527,-1.525604,-1.601094,-1.632247,-1.561806,-1.547703,-1.396585,-1.549064,-1.390192,-1.631668,-1.603218,-1.467851,-0.253814,-1.409238,-1.494805,2.745513,-1.549431,2.249057,0.980224,-1.594142,1.342944,-1.517599,-0.857558,-1.512639,1.533982,-1.339978,3.737477,-1.391075,-1.371431,-0.343284,-1.380614,-1.574345,-1.623899,-0.251694,-1.224326,-1.529244,-1.581402,4.151693,-1.492653,0.829439,-1.611794,-1.553943,-1.531965,-1.514008,-1.643932,-1.266751,-1.511899,-1.586327,-1.620505,2.777343,-1.404117,-1.657573,-1.687846,-1.422966,-0.780818,-1.299758,-0.628505,-0.121618
2,-1.010656,-1.271471,-1.207577,2.074188,1.614956,2.007590,6.589465,-1.374983,-1.260541,-1.450437,-1.464440,-1.332187,-1.378543,-1.357404,4.944262,-1.135714,0.924946,1.518677,-0.864071,-1.520523,-1.308277,0.001427,-1.352170,-1.241024,4.492778,-1.413115,-1.387910,-1.474926,-1.551057,-1.376167,-1.415526,-1.233463,-1.391570,-1.319213,-1.332545,-1.431528,-1.287814,4.748064,-1.345603,-1.431643,-1.396456,-1.397756,-1.360939,-1.085002,-1.420398,-1.334369,-1.064852,-1.288315,1.083776,-1.353123,-1.357820,-1.394112,-1.357486,-1.375918,-0.872777,-0.296593,-1.410997,0.365104,-1.434169,-1.399326,-0.980195,-1.282730,-1.286808,-1.391266,-1.348446,-1.279712,-1.213837,-1.296896,0.074692,-1.323143,-1.357035,-1.430854,-1.481354,2.077808,-1.360545,1.681203,-1.313775,-0.382094,-1.374308,-1.361059,-0.844092,-1.395442,-0.838407,-1.282310,-1.410564,-1.426378,-1.283322,-1.338274,-1.084535,-1.265670,-1.212536,-1.412977,-1.383991,-1.244097,-0.473081,-1.232863,-1.325556,3.463119,-1.337604,1.054756,0.780954,-1.317721,0.294678,-1.289280,0.886482,-1.307220,0.572258,-1.419703,4.713304,-1.237224,-0.932396,-0.971037,-1.191551,-1.494644,-1.460637,-0.819698,-1.329453,-1.379337,-1.115797,6.046976,-1.341719,-0.365858,-1.406861,-1.332324,-1.328038,-1.278196,-1.444028,-0.842345,-1.292369,-1.377934,-1.411724,4.658179,-1.187620,-1.412397,-1.381912,-1.340690,-0.045909,-0.986209,-0.127050,0.551833
3,-1.506548,-1.336604,-1.056332,0.852848,0.946436,0.652196,-1.553841,-1.178436,-1.245057,-1.485463,-1.608860,-1.321451,-1.367749,-1.370948,2.458670,-1.584513,0.834877,-1.507935,-0.873383,-0.723710,-1.385123,0.240378,-1.443920,-1.212697,1.991319,-1.421182,-1.507708,-1.561859,-1.521218,-1.395193,-1.488135,-1.296090,-1.493750,-1.405075,-1.416593,-1.498928,-1.087282,3.490089,-1.227964,-1.365816,-1.435709,-1.315796,-1.464000,3.674975,-1.643728,-1.410159,-0.942491,-1.462532,-1.112371,-1.440496,-1.249769,-1.447153,-1.303398,-1.550373,-0.496860,-0.919247,-1.332756,0.543380,-1.399913,-1.489305,-1.515259,-1.321918,-1.179004,-1.354169,-1.577725,-1.150897,-0.821295,-1.004911,0.275656,-1.535820,-1.410364,-1.556868,-1.379010,-0.568797,-1.571076,0.980073,-1.343998,-0.958824,-1.465775,-1.510480,-1.546694,-1.591890,-1.100110,-1.191252,-1.564873,-1.513613,-1.313196,-1.464063,-0.433923,5.615067,-1.107173,-1.529869,-1.416412,-0.954167,-1.068968,-0.864440,-1.288151,2.639074,-1.425212,0.542125,-0.083842,-1.457550,-0.933656,-1.168870,-0.069508,-1.299593,0.668483,-1.503244,5.709743,-0.631934,-0.421403,0.623684,-1.248126,5.926374,-1.381993,-1.438369,-0.589854,-1.473056,-1.499701,4.161409,-1.346117,0.627012,-1.429347,-1.443592,-1.194565,-1.167380,-1.454221,-1.252114,-1.365307,-1.422516,-1.511633,2.322751,-1.055404,-1.551430,-1.561118,-1.148535,1.396236,-1.462253,1.298150,-0.195608
4,-1.272211,-1.349656,-1.142900,1.304754,1.531233,2.833454,0.443823,-1.292403,-1.201001,-1.091781,-1.517518,-1.415199,-1.428606,-0.868173,1.900248,-1.324359,2.231340,-0.143202,0.213205,-1.557858,-1.172046,0.056330,-1.476984,-1.390703,6.038998,-1.469662,-1.585841,-1.370464,-1.549025,-1.187406,-1.345771,-1.325353,-1.432344,-1.403401,-1.415244,-1.239696,-1.291485,6.666256,-1.342346,-1.315730,-1.336252,-1.357748,-1.478511,-1.048786,-1.505300,-1.276843,-1.265218,-1.016437,1.965979,-1.414406,-1.268929,-1.416232,-1.383265,-1.311840,-0.663503,-0.906074,-1.376893,1.893142,-1.122602,-1.450719,-1.557523,-1.216956,-1.254956,-1.319030,-1.158824,-1.396057,-1.133276,-1.197061,0.001924,-1.165662,-1.243812,-1.259339,-1.361103,2.339784,-1.410420,5.317878,-1.467481,-0.898827,-1.228043,-1.517962,-0.371397,-1.558732,-1.136801,-1.337249,-1.489541,-1.539012,-1.330424,-1.369341,-1.201356,-1.355071,-1.183652,-1.392900,-1.258304,-1.235626,-0.058311,-1.402157,-1.302500,2.591898,-1.350134,2.151902,0.659183,-1.226342,-0.134884,-1.314342,-0.062312,-1.383810,1.998046,-1.083215,5.274286,-1.335720,-0.998889,0.389742,-1.332185,-1.411472,-1.522574,-0.068010,-0.730114,-1.435085,-1.509614,3.042988,-1.247208,1.639698,-1.482613,-1.424782,-1.297671,-1.329230,-1.468054,-1.295433,-1.301610,-1.158600,-1.345016,1.609707,-1.278629,-1.350011,-1.648516,-1.214390,0.288555,-1.344913,-0.039439,-0.498315
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48198,-0.520571,-1.446298,-1.333212,1.226779,1.118786,2.732154,4.920837,-1.530176,-1.416398,-1.343555,-1.581935,-1.536683,-1.521394,-1.430096,3.644986,-1.375152,2.543882,1.617501,-0.595053,-1.600122,-1.383155,-0.133517,-1.428545,-1.162127,4.858096,-1.533847,-1.576231,-1.550021,-1.658090,-1.462695,-1.532652,-1.415353,-1.474376,-1.446025,-1.464971,-1.509611,-1.405783,6.258062,-1.421973,-1.581838,-1.581651,-1.484735,-1.554419,-1.375611,-1.549044,-1.452298,-1.373945,-1.372723,3.570590,-1.539043,-1.419432,-1.594808,-1.276718,-1.570043,-0.990719,-0.325647,-1.555443,1.068727,-1.565544,-1.510546,-1.501571,-1.418419,-1.417387,-1.439717,-1.594535,-1.389467,-1.329167,-1.244673,-0.114199,-1.454957,-1.452832,-1.552856,-1.558651,5.353963,-1.440392,2.935039,-1.530277,-0.210803,-1.505090,-1.550401,-0.529867,-1.509280,-1.073027,-1.395025,-1.504454,-1.536773,-1.430418,-1.427118,-1.187014,-1.216357,-1.330071,-1.470005,-1.471741,-1.408938,-0.099801,-1.418980,-1.450342,2.793012,-1.476687,1.613821,1.229624,-1.589362,1.143333,-1.449506,-1.191991,-1.405894,1.951454,-1.743502,1.936536,-1.215991,-1.164546,-0.285573,-1.274349,-1.134150,-1.551967,-0.011424,-1.367180,-1.479140,-1.512057,3.459368,-1.417311,0.508917,-1.550602,-1.477608,-1.422782,-1.468938,-1.512276,-1.151931,-1.383198,-1.336235,-1.507962,2.403565,-1.350148,-1.524354,-1.654466,-1.411261,-0.456083,-1.010118,-0.709112,0.126238
48199,-0.641409,-1.391113,-1.307034,0.296210,0.626469,2.656102,2.535754,-1.514254,-1.408760,-1.089353,-1.525036,-1.494595,-1.320098,-1.396001,2.516135,-1.145856,1.904267,2.167556,0.087699,-1.627212,-1.416891,-0.120703,-1.433683,-0.953134,4.334615,-1.470356,-1.526713,-1.423867,-1.640714,-1.486353,-1.539591,-1.415365,-1.455074,-1.397985,-1.440594,-1.489195,-1.399510,5.994453,-1.415126,-1.517146,-1.605343,-1.420984,-1.532150,-0.847823,-1.529753,-1.446140,-1.307164,-1.310757,3.240212,-1.465302,-1.461950,-1.571509,-1.501429,-1.552469,-1.082406,-0.315041,-1.445862,0.114723,-1.554310,-1.473060,-1.441645,-1.489591,-1.414158,-1.481999,-1.561245,-1.361509,-1.247181,-1.291039,-0.247193,-1.464070,-1.501639,-1.575953,-1.473647,6.777804,-1.493657,2.840919,-1.499824,0.267264,-1.491480,-1.483413,0.361159,-1.455126,-1.187746,-1.366947,-1.504428,-1.518695,-1.447815,-1.426610,-1.162873,-1.307931,-1.320323,-1.528025,-1.495235,-1.391767,0.029807,-1.358215,-1.358588,2.154989,-1.435378,1.525113,0.341343,-1.589713,2.042199,-1.397458,-1.014115,-1.376949,2.262401,-1.130255,1.718631,-1.136366,-1.167024,-0.273883,-1.245803,-1.164059,-1.487741,0.829643,-1.263924,-1.461942,-1.498188,2.350487,-1.394380,2.042990,-1.484608,-1.473071,-1.359931,-1.448089,-1.519321,-1.383008,-1.377266,-1.379579,-1.531409,1.995489,-1.341270,-1.576226,-1.658416,-1.425811,-0.163358,-0.952007,-0.691250,0.277180
48200,-1.383966,-0.817277,-1.052890,0.099039,0.724624,1.477175,-1.276211,-0.455357,-1.187938,-1.269387,-1.238588,-1.340240,-1.310978,-1.315726,-0.761487,-1.423979,1.071221,-1.156683,-1.209055,-1.126398,-1.265997,2.207539,-1.148840,-1.129911,-0.614602,-1.170780,-1.252894,-1.292672,-1.347790,-1.206976,-1.260608,-1.263878,-1.281240,-1.209805,-1.243247,-1.221713,-1.325791,5.073211,-1.184845,-1.270493,-1.334181,-1.342831,-1.257830,0.158339,-1.376684,-1.177355,-1.268132,-0.626399,-1.586047,-1.309905,-1.241526,-1.248161,-1.315344,-1.483741,-1.000205,-0.765076,-1.187650,-1.294591,-1.408308,-1.238622,-1.287448,-1.196338,-1.168577,-1.202483,-1.292682,-1.273695,-1.274419,-1.218796,-0.462121,-1.299479,-1.209044,-1.374536,-1.255920,-0.968433,-1.264982,-0.375315,-1.259396,-1.161867,-1.266183,0.257565,-1.020998,0.115923,-1.238692,-1.116885,-1.297423,-1.320278,-1.257922,-1.219035,-0.434616,6.594982,-1.247654,-1.325174,-1.203815,-1.097859,-1.056196,-1.194640,-0.769394,1.713620,-1.226898,3.200755,-0.133794,-1.244782,-1.224115,-1.243201,-1.333687,-1.323710,0.489745,-1.378499,-0.525212,-1.196712,-1.282144,0.167660,-1.268242,7.981814,-1.303805,-1.367204,-1.138607,-1.301025,-1.201215,0.169105,-1.303702,6.219264,-1.236934,-1.161992,-1.199953,-1.085086,-1.359929,-1.125990,-1.297160,-0.929627,-1.390297,-0.619302,-1.336130,-1.327181,-1.333064,-1.075237,-0.313562,-1.231197,-1.183957,-0.367714
48201,-1.510795,-1.396045,-0.933571,0.539321,-0.054786,1.035743,-1.380013,-1.157287,-1.002030,-1.549959,-1.639010,-1.197559,-1.376003,-1.238775,1.158366,-1.503967,0.408375,-1.166183,-1.076883,-1.127724,-1.130991,1.260304,-1.625534,-1.113135,2.026301,-1.382103,-1.562430,-1.652046,-1.295157,-1.338008,-1.418059,-1.033716,-1.419218,-1.339538,-1.370080,-1.448213,-0.729094,1.441759,-0.912108,-1.130914,-1.390924,-1.099978,-1.375461,0.692936,-1.759153,-1.409878,-0.493102,-1.637192,-1.015282,-1.413356,-0.953711,-1.382503,-1.490589,-1.577617,-0.325730,-0.816219,-1.284719,0.096220,-1.376390,-1.323769,-1.592044,-1.109473,-0.894841,-1.171826,-1.580167,-0.916017,-0.181376,-0.887661,0.928777,-1.626286,-1.372843,-1.580970,-1.301643,2.149900,-1.626201,2.530985,-1.206650,1.348080,-1.450577,-1.496150,-1.486104,-1.699624,-1.192985,-1.010591,-1.609609,-1.565464,-1.156507,-1.473260,0.238628,2.060130,-0.690448,-1.452796,-1.320005,-0.821463,-1.285458,-1.198348,-0.985943,3.128843,-1.295043,0.390853,-0.475541,-1.442610,0.047102,-0.920430,0.993689,-1.159104,1.587487,-1.466593,6.614687,0.201702,0.561949,-0.303488,-1.112615,4.803817,-1.278976,-1.168382,-0.575475,-1.418272,-1.511029,4.777567,-1.170737,-0.611662,-1.301926,-1.316312,-0.868718,-0.955986,-1.351070,-1.079908,-1.246038,-1.289653,-1.488258,2.525875,-0.688867,-1.577495,-1.551894,-1.134680,3.389776,-1.486254,2.734019,0.597246


### - add cell_ids to train and test data
### - SHAP

In [None]:
train_ids = np.load(index_path + "train_cite_raw_inputs_idxcol.npz", allow_pickle=True)
test_ids = np.load(index_path + "test_cite_raw_inputs_idxcol.npz", allow_pickle=True)

train_index = train_ids["index"]
train_column = train_ids["columns"]
test_index = test_ids["index"]
print(len(list(train_index)))
print(len(list(test_index)))
X_train_cell_ids = pd.read_pickle(cite_feature_path  + 'X_svd_128.pickle')   # = X_svd_128 in make-features second to last cell
X_train_cell_ids.index = train_index
X_train_cell_ids

70988
48203


In [33]:
# cell type from metadata
X_test_cell_ids = pd.read_pickle(cite_feature_path  + test_file)
X_test_cell_ids.index = test_index
X_test_cell_ids

Unnamed: 0,base_svd_0,base_svd_1,base_svd_2,base_svd_3,base_svd_4,base_svd_5,base_svd_6,base_svd_7,base_svd_8,base_svd_9,base_svd_10,base_svd_11,base_svd_12,base_svd_13,base_svd_14,base_svd_15,base_svd_16,base_svd_17,base_svd_18,base_svd_19,base_svd_20,base_svd_21,base_svd_22,base_svd_23,base_svd_24,base_svd_25,base_svd_26,base_svd_27,base_svd_28,base_svd_29,base_svd_30,base_svd_31,base_svd_32,base_svd_33,base_svd_34,base_svd_35,base_svd_36,base_svd_37,base_svd_38,base_svd_39,base_svd_40,base_svd_41,base_svd_42,base_svd_43,base_svd_44,base_svd_45,base_svd_46,base_svd_47,base_svd_48,base_svd_49,base_svd_50,base_svd_51,base_svd_52,base_svd_53,base_svd_54,base_svd_55,base_svd_56,base_svd_57,base_svd_58,base_svd_59,base_svd_60,base_svd_61,base_svd_62,base_svd_63,base_svd_64,base_svd_65,base_svd_66,base_svd_67,base_svd_68,base_svd_69,base_svd_70,base_svd_71,base_svd_72,base_svd_73,base_svd_74,base_svd_75,base_svd_76,base_svd_77,base_svd_78,base_svd_79,base_svd_80,base_svd_81,base_svd_82,base_svd_83,base_svd_84,base_svd_85,base_svd_86,base_svd_87,base_svd_88,base_svd_89,base_svd_90,base_svd_91,base_svd_92,base_svd_93,base_svd_94,base_svd_95,base_svd_96,base_svd_97,base_svd_98,base_svd_99,base_svd_100,base_svd_101,base_svd_102,base_svd_103,base_svd_104,base_svd_105,base_svd_106,base_svd_107,base_svd_108,base_svd_109,base_svd_110,base_svd_111,base_svd_112,base_svd_113,base_svd_114,base_svd_115,base_svd_116,base_svd_117,base_svd_118,base_svd_119,base_svd_120,base_svd_121,base_svd_122,base_svd_123,base_svd_124,base_svd_125,base_svd_126,base_svd_127,imp_0,imp_1,imp_2,imp_3,imp_4,imp_5,imp_6,imp_7,imp_8,imp_9,imp_10,imp_11,imp_12,imp_13,imp_14,imp_15,imp_16,imp_17,imp_18,imp_19,imp_20,imp_21,imp_22,imp_23,imp_24,imp_25,imp_26,imp_27,imp_28,imp_29,imp_30,imp_31,imp_32,imp_33,imp_34,imp_35,imp_36,imp_37,imp_38,imp_39,imp_40,imp_41,imp_42,imp_43,imp_44,imp_45,imp_46,imp_47,imp_48,imp_49,imp_50,imp_51,imp_52,imp_53,imp_54,imp_55,imp_56,imp_57,imp_58,imp_59,imp_60,imp_61,imp_62,imp_63,imp_64,imp_65,imp_66,imp_67,imp_68,imp_69,imp_70,imp_71,imp_72,imp_73,imp_74,imp_75,imp_76,imp_77,imp_78,imp_79,imp_80,imp_81,imp_82,imp_83
83d6659a6a32,94.056297,-7.824483,0.390148,-11.240480,1.049213,-6.360567,-0.291657,-0.807333,1.523107,4.391570,1.058334,1.107465,2.794737,0.232357,0.485993,0.610800,1.087533,-3.675157,-2.152177,0.258529,-0.241675,-0.281301,-0.892517,3.144524,-1.095826,0.484069,1.941438,-2.086771,0.126985,-0.265173,1.782843,0.401962,-0.815250,-1.178362,0.804193,-0.375622,1.732931,0.167944,-0.211067,0.322198,-0.055706,0.074955,-0.984684,0.062712,0.836429,-0.737014,-0.763474,-0.775506,-0.968059,0.333593,-0.577777,-0.923708,0.634263,0.665948,-0.189621,0.493161,-0.988103,-0.436438,0.022296,0.698116,-0.026895,-0.644371,0.149706,0.742314,0.001074,-0.240200,0.196376,-0.087378,-0.114259,0.454667,-1.015756,-0.507654,-0.075879,0.975734,0.691606,-0.203795,-0.039084,-0.425149,-0.519585,0.167085,-0.137737,-1.078830,0.527103,-0.183130,0.473650,-0.550963,-0.575732,-0.393081,1.038305,-0.292080,-0.802224,0.014176,0.436137,-0.697561,0.277782,-0.157537,-0.256823,-0.159989,0.244960,-0.068922,-0.413599,0.258720,0.399672,-0.208129,0.502469,0.384433,-0.383426,-0.506806,-0.165553,0.475413,0.240959,0.584796,-0.482804,0.413303,-0.030615,-0.264916,-0.404241,-0.620478,0.027995,-1.228871,-0.297079,-0.250295,0.024182,0.137247,-0.458344,-0.103292,-0.283405,-0.316505,0.000000,0.508287,0.000000,1.094377,0.000000,0.000000,1.294662,4.059924,2.392116,1.461449,0.0,0.508287,0.0,0.0,0.0,0.0,0.0,0.0,1.294662,1.294662,0.000000,0.000000,0.0,2.031354,1.461449,0.508287,1.940462,0.000000,0.0,2.559081,0.000000,0.000000,0.00000,0.000000,1.940462,0.000000,0.000000,0.000000,1.094377,2.865681,0.508287,1.461449,0.000000,3.337905,1.604354,2.559081,3.604731,2.609067,2.745583,2.450923,2.392116,2.114670,1.094377,0.508287,2.031354,1.840474,1.094377,0.000000,0.508287,0.000000,0.000000,5.809778,0.000000,2.191574,0.000000,0.0,3.449107,0.000000,4.843113,4.489811,4.320063,4.511869,1.094377,1.461449,1.294662,1.604354,0.000000,1.294662,1.294662,2.114670,1.461449,4.198429,4.615403,3.129412
d98594f13d2e,92.367874,-4.847517,0.514445,-8.376771,-1.919603,-4.256521,-1.352172,5.257206,1.822807,2.321193,2.636189,2.633024,0.982998,0.518114,0.128080,0.306104,1.269342,-2.080164,-2.541140,0.760104,-0.550402,-2.429527,0.044688,0.752230,0.119079,-0.403264,1.597901,-0.423613,0.300802,-0.739244,0.666266,-0.138264,-1.897435,1.005163,-1.827034,0.499680,0.951236,1.041719,0.434172,-0.569637,-2.159725,-1.200089,-0.436869,-0.044019,0.341884,0.216819,0.367806,0.440336,-0.379974,-0.532230,0.488940,0.664255,0.076050,-1.076472,0.571799,-0.051388,-0.006142,0.083632,-0.594921,-0.024665,0.519003,-0.247730,0.256428,-0.131643,1.499306,0.367236,-0.217262,-0.950290,-0.592358,-0.824632,0.417456,0.530958,-0.173855,0.189655,0.464845,-0.895781,0.278594,0.029154,-0.141803,0.500085,-0.512404,-0.238833,-0.429215,0.860054,0.407503,1.152907,0.065071,-0.575503,0.950789,0.008994,0.256526,0.415921,-0.654593,-0.204853,0.240657,-0.189212,0.559002,0.144552,0.035368,0.674531,-0.285661,-0.723305,0.601063,0.097456,0.094890,0.523900,-0.118012,-0.865405,0.749517,0.651954,0.198055,-0.107615,-0.404861,0.294643,-0.361705,0.822929,-0.099081,0.079139,-0.105911,-0.291076,-0.118448,0.495592,0.061346,0.332917,-0.221393,0.646960,-0.341091,0.546805,0.000000,0.000000,0.000000,1.981066,0.000000,0.000000,0.577348,4.288819,2.083405,0.577348,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.577348,1.207145,0.000000,0.000000,0.0,1.417123,1.590569,0.577348,1.417123,0.000000,0.0,1.981066,0.000000,0.577348,0.00000,0.577348,1.417123,0.000000,0.000000,0.000000,0.941029,2.983224,0.000000,0.941029,0.000000,2.856900,1.867046,2.856900,3.868667,2.712278,1.981066,2.083405,1.207145,0.941029,0.000000,1.867046,2.176237,1.867046,0.000000,0.000000,0.000000,0.000000,0.000000,5.791753,0.000000,1.417123,0.000000,0.0,3.566078,0.000000,5.001711,4.341034,4.456236,4.583765,1.867046,0.941029,1.207145,1.590569,0.000000,0.941029,1.207145,2.083405,1.207145,4.267145,4.599603,3.059365
5f93d8ffc72f,90.292252,0.146880,-0.271159,-10.908630,-3.214201,5.237493,3.355039,3.375244,2.553044,0.776845,0.540434,3.613944,1.005818,0.078405,2.192658,-0.775795,2.759478,-2.709326,-0.784265,1.249367,0.050844,-2.158159,0.776920,0.989583,-1.557484,-2.504500,1.821522,-0.157095,-0.196657,0.151366,2.136578,-0.454421,-0.938510,-0.812240,-1.540948,0.853244,0.238498,-1.129040,-0.538417,0.603914,-0.147041,-0.573247,-0.776392,-0.213989,0.415222,0.680242,0.659678,1.444883,-0.078884,0.632961,0.490239,-0.703269,0.073249,-1.098591,-0.422700,0.932927,0.367269,0.334818,-0.834014,0.075152,-0.153600,0.953423,-0.731156,0.861282,0.801674,0.063491,0.173407,-0.519551,0.298187,-0.149882,-0.362298,-0.902921,0.125706,-0.269594,-0.097179,0.610862,-1.128629,0.571575,-0.753852,0.661824,-0.173195,-0.646502,0.040034,-0.447189,0.934805,0.504365,-0.828626,0.425130,0.993686,0.641542,-0.242037,-0.775351,-0.370694,-0.619784,0.405900,1.124774,1.002989,-0.739349,-0.102338,-0.287145,0.106964,-0.233670,0.054930,0.829287,0.344867,-0.298101,-0.227078,0.542389,0.671565,0.917085,-0.531678,0.017997,-0.249524,-0.731871,0.016846,-0.254381,0.174406,-0.416123,0.494883,0.170989,0.700896,-0.113854,0.735028,-0.010207,-0.663224,0.219934,1.077817,-0.439766,0.000000,0.000000,0.000000,1.568616,0.667829,0.000000,0.000000,3.756538,1.348073,0.667829,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.348073,0.000000,0.000000,0.000000,0.0,1.064711,1.348073,0.667829,2.256541,0.000000,0.0,1.749200,0.000000,0.000000,0.00000,0.000000,1.902107,0.000000,0.000000,0.000000,1.749200,3.446808,0.000000,1.749200,0.667829,2.151762,1.064711,2.151762,3.799973,2.517696,2.437990,1.902107,1.749200,0.000000,1.064711,0.667829,1.064711,1.348073,0.000000,0.000000,0.000000,0.000000,0.000000,5.912421,0.000000,1.568616,0.000000,0.0,2.895912,0.000000,5.232178,4.564348,4.426641,4.583947,1.348073,2.034706,0.667829,1.902107,0.000000,0.000000,1.348073,0.000000,0.667829,4.060443,4.603168,2.256541
7dfa2699d351,93.805962,-12.445390,10.555358,-0.710891,4.603803,-2.805171,1.068830,1.833984,-4.608570,-0.201459,-3.962924,-0.005088,-1.098507,-1.379581,0.761541,-3.148588,2.913516,0.101658,0.022856,1.432993,-2.331087,-1.915733,-1.166527,2.186154,1.076372,0.130604,0.734702,-0.497981,-0.771763,0.693833,-0.246459,-0.041706,-2.748921,-0.384865,0.335162,1.048026,-1.224275,-0.117028,0.753611,0.859828,-0.645335,0.582266,0.291961,0.176579,-0.875614,-0.406391,0.609829,-0.942578,0.067347,-0.048446,-0.238935,-0.415209,-0.761388,-0.452382,-0.111521,-0.087207,-0.761507,-0.158409,0.188839,-0.832592,-0.993856,-0.803717,0.039336,0.039953,-0.668979,-0.403642,0.250717,0.300318,-0.047885,0.289365,-0.833994,0.166385,0.401594,-0.244863,-0.123470,-0.393044,-0.637198,-0.528741,-0.038649,0.172623,0.574302,0.783882,0.108415,-0.170513,0.196612,-0.214809,-0.523400,0.124907,-0.247740,-0.464020,0.480048,0.126403,-0.058947,-0.218305,0.112533,-0.333537,-0.035461,-0.033330,0.426228,0.032991,-0.607229,0.201048,0.317781,-0.583695,-0.012970,-0.550355,-0.422804,0.357776,0.847442,0.232092,0.755277,-0.097016,-0.748176,-0.028148,-0.121889,0.360223,0.581758,0.046662,0.773319,0.151388,0.411271,-0.567683,0.102688,-0.385164,-0.041409,-0.168106,-0.399426,-0.514307,0.000000,1.490374,0.763723,0.453065,0.453065,0.763723,0.000000,3.446033,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.000403,0.763723,0.000000,0.0,0.000000,0.000000,0.453065,2.651967,0.453065,0.0,2.522726,0.000000,1.720086,0.00000,0.763723,0.763723,1.000403,0.000000,0.000000,0.000000,1.817774,0.453065,2.426242,0.453065,4.203341,0.453065,2.199862,3.019756,2.199862,2.261439,2.766400,2.729691,1.720086,0.000000,0.763723,0.763723,0.000000,0.000000,1.000403,2.064012,0.000000,1.000403,6.125139,0.763723,1.191648,0.000000,0.0,2.766400,0.763723,4.629310,4.228714,4.075599,4.547748,3.702430,3.389675,0.000000,1.000403,0.000000,0.453065,0.453065,0.000000,2.261439,3.917783,4.331779,1.906763
6d2533edd0e0,88.557716,-11.090774,-5.711870,2.695317,-10.597410,1.775790,1.812456,3.326061,-5.383945,0.696400,1.563926,-0.765326,-0.380334,-0.970265,-0.079944,1.173417,1.223524,-2.820121,0.673842,-1.156821,-0.484202,0.746177,0.037934,2.665554,-0.311520,0.708579,0.255822,0.741675,1.119297,-1.120647,0.214879,1.441037,1.238391,-0.537630,0.270196,-0.453500,0.270915,0.527241,0.677029,0.322304,1.486271,-0.471216,0.408005,-0.155857,-0.568602,-1.050784,-0.343367,0.097307,-0.434764,0.242725,1.342605,-0.545367,-0.621526,-0.858444,1.388187,0.179331,0.581259,-0.357712,-0.341630,-2.161029,-0.510752,-0.489257,0.450156,-1.643882,0.237520,0.157225,0.023587,0.256288,0.167107,0.536678,-0.635633,-0.539151,0.389995,-0.116683,-0.687299,-0.153719,-1.008250,0.364290,-0.454153,0.218373,-0.204755,0.360526,0.145482,0.310088,0.845523,0.275738,0.667759,-0.279717,-0.035411,-0.418841,0.727035,-0.449049,0.945009,-0.552824,0.542864,-0.896657,0.199615,0.214975,-0.331306,0.687645,0.182220,0.394036,-0.574243,0.225955,-0.573348,0.271190,0.337097,-0.561208,0.382440,-0.123613,0.011672,0.159826,0.263654,-0.904831,-1.031118,-0.508579,-0.497193,0.288741,0.251183,0.019236,0.610962,-0.932482,0.343904,1.187420,1.050605,-0.445473,0.542514,0.361364,0.000000,0.000000,0.000000,1.612422,0.000000,0.000000,2.200539,4.069838,1.612422,0.849430,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.849430,0.000000,0.000000,0.000000,0.0,1.301996,0.849430,2.338743,2.666056,0.000000,0.0,3.048074,0.000000,0.000000,0.84943,0.849430,2.040125,0.000000,0.000000,0.000000,0.000000,2.912246,0.000000,0.849430,0.000000,3.048074,0.000000,2.568391,3.810308,2.666056,1.612422,3.109642,2.568391,1.612422,1.848967,1.612422,1.301996,0.849430,0.849430,0.000000,0.849430,0.000000,0.000000,5.697448,0.000000,0.849430,0.000000,0.0,3.684146,0.000000,5.093357,4.842623,4.347486,4.564559,2.040125,1.612422,0.000000,2.040125,0.000000,1.848967,0.000000,2.460145,0.849430,3.500123,4.461901,2.040125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
a9b4d99f1f50,85.366356,1.080519,-8.010132,0.796445,-3.396977,1.601400,3.458448,-1.217016,0.786072,4.255503,-2.755232,0.385691,2.922162,1.191900,-3.223046,0.136868,0.361752,0.188772,-3.447983,-1.681520,-2.400188,-2.289102,-0.736924,-1.490947,-2.242228,-0.080314,0.956282,-0.271156,1.296366,-1.180623,-0.625098,0.106756,-0.547222,0.133906,-0.668780,-0.767058,-0.745732,1.135188,-0.465981,-1.657057,1.341717,0.513547,-1.946778,0.553342,0.301789,-1.054872,-0.269763,-0.365018,-0.364072,-0.567840,-0.263121,0.884864,0.179559,-0.909130,1.086737,0.577281,-0.178645,0.705079,0.761436,-0.291615,1.398727,0.820755,-0.234059,0.668246,1.058612,1.035108,0.071919,0.264569,0.839250,-0.201995,-0.701525,1.007342,-0.103322,-0.433468,-0.420088,-0.724183,0.907655,-1.337198,0.345874,-1.149339,-0.106747,0.403213,0.246975,0.363520,-0.575282,0.769409,-0.492506,0.239710,-0.410164,0.490752,0.061744,0.008617,1.046630,0.073186,-1.125118,0.094639,-0.663558,0.149662,-0.116115,-0.599895,0.276712,1.081395,-0.170043,-0.012898,0.705159,0.940782,-0.634582,0.659923,-1.086162,-1.151770,0.585043,-0.631075,-0.093587,-0.537206,0.406563,0.055658,0.487570,0.428913,-0.661988,0.700284,1.036009,0.391724,-0.128284,-0.108069,-0.779801,-0.242760,-0.864690,-0.714510,0.000000,0.000000,0.000000,1.303292,0.000000,0.000000,0.000000,4.166040,2.382485,0.000000,0.0,0.638585,0.0,0.0,0.0,0.0,0.0,0.0,1.025180,0.000000,0.000000,0.000000,0.0,1.303292,1.699095,1.303292,1.850468,0.000000,0.0,2.889381,0.000000,0.000000,0.00000,0.000000,2.098066,0.000000,0.000000,0.000000,0.000000,2.296365,0.000000,1.520649,0.000000,3.187953,1.850468,2.202123,3.417575,2.098066,1.981910,2.838399,1.520649,1.025180,0.000000,1.981910,1.850468,0.000000,0.000000,0.000000,0.000000,0.638585,0.000000,6.094541,0.000000,1.850468,0.000000,0.0,2.838399,0.000000,4.732848,4.463661,4.624966,4.366412,2.727904,0.000000,0.000000,1.850468,0.000000,1.025180,1.025180,1.699095,0.638585,3.224165,4.552285,3.150382
0e2c1d0782af,81.205399,6.503594,-10.942855,2.147213,1.046774,-0.758410,1.365951,-1.823899,1.040893,4.043109,-0.935470,0.734516,-1.798569,2.394704,-2.455899,0.074219,-1.513982,0.663842,-4.000921,-0.806638,-1.180238,-1.438603,-3.570682,0.084025,0.765049,1.600097,0.822755,0.711581,-0.265519,0.740322,-1.637914,-1.613475,-1.138829,0.689822,0.029636,0.288107,0.951587,0.428669,-0.449886,-0.377786,0.974956,-0.380031,0.425508,-1.011746,-0.455452,0.245029,1.302600,1.871541,0.595938,0.058742,0.611514,-0.196153,-0.215084,-0.283142,-0.214137,0.169092,-0.434177,2.029846,0.689977,-0.487871,0.855896,-0.865864,-1.414682,0.831691,1.132567,-0.059600,-0.249577,0.120503,0.197125,-0.971337,0.072568,0.084610,-0.130287,-0.783091,-0.516379,0.548477,-0.037989,-0.683084,1.180951,-0.378083,-0.416962,0.518270,0.243468,-0.183180,-1.288607,-0.520516,0.424579,-1.538023,1.318404,0.185988,0.421319,-0.115238,-0.717841,1.218067,0.259176,-0.428944,0.233220,-0.391345,-0.165910,0.077671,-0.265290,-0.145123,0.239169,0.763478,-0.863415,0.175534,0.012452,-0.753019,-0.058406,0.654802,0.731386,0.104266,1.254263,0.174936,0.543565,-0.492913,0.158094,-0.563965,-0.145903,-0.425699,0.258020,1.279177,0.781363,-0.452470,0.157240,-0.326319,0.520935,-0.188791,0.000000,0.000000,0.000000,0.757912,0.000000,0.000000,0.757912,4.055097,2.681418,1.184065,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.184065,0.000000,0.000000,0.000000,0.0,1.711139,1.184065,1.897482,2.416301,0.000000,0.0,3.115402,0.757912,0.000000,0.00000,0.757912,2.054495,0.000000,0.000000,0.000000,0.757912,2.600611,0.000000,1.897482,0.000000,2.951835,1.897482,2.190167,3.453569,2.890774,2.681418,2.890774,0.757912,0.000000,0.000000,1.481933,1.184065,1.184065,0.757912,0.000000,0.000000,0.000000,0.000000,5.730957,0.000000,0.757912,0.000000,0.0,2.512697,0.000000,4.912096,4.313453,4.566850,4.601583,3.164475,0.757912,0.000000,1.481933,0.000000,1.481933,0.000000,1.481933,1.184065,4.282626,4.749403,3.705824
a3cbc5aa0ec3,80.439636,7.672957,15.710591,15.690635,8.388031,-5.819407,22.253601,1.320871,0.245843,-0.062292,6.345723,0.137742,0.052358,-0.312850,-1.177019,1.313636,1.003704,0.869457,-3.321661,0.135167,2.845818,1.450652,-0.839094,-2.402138,1.407723,1.448392,-0.527960,-2.169389,-2.744343,0.891098,1.123585,-0.470671,-0.316016,-2.524772,-0.164838,-0.839472,-0.520922,-0.417371,0.004394,0.850240,1.227654,0.005752,-0.838477,-1.400241,-0.268175,-1.651438,-1.231924,-0.246533,-1.907639,-1.266127,-2.909127,-0.469373,0.736211,0.843152,0.183718,-3.830316,1.950835,1.014072,-0.823337,-0.607501,-0.191715,-0.710901,-0.630074,0.592347,-0.718751,0.169095,0.816668,1.121860,-2.033425,0.261176,-1.139817,-0.728157,1.145277,-0.305126,0.406975,-0.976506,-0.048004,-0.647224,0.973937,0.428247,-0.534282,0.333755,2.385608,-0.779210,0.614054,0.110426,0.318968,-0.811001,-0.328077,-0.166389,-0.776845,-1.341591,-1.096393,-0.458532,0.220149,1.133881,-0.534786,-1.963511,-1.143819,0.302970,-0.042720,0.327223,-1.081703,-0.979657,1.474687,0.319095,0.024592,0.303983,1.102567,0.375897,0.207510,-0.226249,0.158661,-0.544645,-0.423482,0.005524,0.287137,0.355960,0.662703,-0.456724,-0.088502,-0.916667,-0.542742,1.477447,-0.058504,1.226709,-0.957758,1.081939,0.000000,2.445630,0.000000,0.000000,0.775127,3.039977,0.000000,4.004784,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,3.370767,0.000000,0.0,0.000000,0.000000,1.206491,3.286875,0.000000,0.0,0.000000,0.000000,0.000000,0.00000,2.218777,0.775127,2.786325,0.000000,0.000000,0.000000,0.775127,0.000000,2.218777,0.775127,3.679805,0.000000,2.218777,2.856019,2.218777,2.338623,0.000000,0.000000,1.206491,0.000000,0.000000,0.775127,1.737563,0.000000,0.000000,0.000000,0.000000,0.000000,5.409255,0.000000,0.775127,0.000000,0.0,1.506875,1.924882,4.198234,3.410214,3.410214,3.286875,4.025902,3.554067,0.000000,1.206491,0.775127,0.000000,0.000000,0.000000,0.000000,6.133726,3.983210,0.000000
75b350243add,84.337151,-3.197404,1.886770,5.541795,13.938139,6.305854,-4.405701,-3.572052,2.690609,0.877849,3.526787,0.154360,1.378285,0.823426,-1.125885,-1.530648,2.033235,1.787222,-0.625443,-0.946741,-1.865516,-0.481350,-0.055673,-2.632052,-1.777798,-0.591003,-0.537863,0.625473,1.930437,0.314187,-0.271688,0.958532,0.146981,0.529214,0.589338,-0.807976,0.102165,-0.331888,-1.149505,-1.115255,-0.161035,-1.412556,-0.314395,0.309252,-1.941458,0.536575,-1.403757,-0.266827,-0.030160,-0.115960,-0.021319,0.303994,-1.470444,-0.438163,-0.909147,-0.274562,-2.081506,-0.795673,0.653863,-0.450512,0.108288,0.564409,-1.183599,-0.209260,0.697914,1.705868,-0.212448,0.650631,-0.346046,1.162816,-0.941122,0.599168,0.062152,0.665024,1.329877,-0.512315,0.565533,0.511597,0.970352,-0.133340,-1.347535,-1.277951,0.715768,0.444348,-0.105136,0.777697,0.496998,0.111410,-0.003564,1.091448,-0.480395,-0.485875,0.988169,-1.312475,-0.119006,0.010417,0.689612,-0.479114,-0.117106,0.348491,0.114891,-0.346227,-0.482128,-0.231879,0.078575,-0.569172,-0.349015,-0.290385,0.408152,-1.147283,0.550259,0.507853,0.008024,0.514796,0.407690,-0.232874,-0.045954,-0.001800,-0.021512,0.136286,-0.362910,-0.325278,-0.034947,0.062858,0.903601,-0.055573,-0.527762,-0.357432,0.942551,0.000000,1.592557,1.419014,1.419014,0.578443,0.000000,3.260568,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.578443,0.942551,0.0,0.578443,0.000000,0.942551,2.414340,0.000000,0.0,2.482040,0.000000,1.419014,0.00000,0.000000,1.208895,0.578443,1.740389,0.000000,0.578443,0.942551,1.208895,2.903163,0.000000,4.633018,0.000000,2.178450,3.499946,1.740389,1.592557,1.983217,0.000000,0.578443,0.000000,0.942551,1.869156,0.000000,0.000000,0.000000,2.263414,0.000000,0.000000,6.216047,0.000000,1.419014,0.578443,0.0,2.178450,1.419014,4.537156,4.236187,4.063508,4.165798,4.141190,3.451483,0.000000,0.000000,1.208895,0.000000,0.000000,0.000000,2.859252,1.869156,4.280502,0.578443


In [34]:
metadata = pd.read_csv('/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di93zoj/neurips_competition_data/' + 'metadata.csv')
metadata.head()

Unnamed: 0,cell_id,day,donor,cell_type,technology
0,c2150f55becb,2,27678,HSC,citeseq
1,65b7edf8a4da,2,27678,HSC,citeseq
2,c1b26cb1057b,2,27678,EryP,citeseq
3,917168fa6f83,2,27678,NeuP,citeseq
4,2b29feeca86d,2,27678,EryP,citeseq


In [35]:
X_test_cell_ids = X_test_cell_ids.reset_index().rename(columns = {'index': 'cell_id'})
X_test_cell_ids = X_test_cell_ids.merge(metadata[['cell_id', 'cell_type']], on = 'cell_id', how = 'left')

In [36]:
X_test_cell_ids['cell_type'].value_counts()

cell_type
HSC     9451
MasP    9064
EryP    8788
NeuP    7719
MkP     4844
MoP     1215
BP       106
Name: count, dtype: int64

In [37]:
X_test_cell_ids

Unnamed: 0,cell_id,base_svd_0,base_svd_1,base_svd_2,base_svd_3,base_svd_4,base_svd_5,base_svd_6,base_svd_7,base_svd_8,base_svd_9,base_svd_10,base_svd_11,base_svd_12,base_svd_13,base_svd_14,base_svd_15,base_svd_16,base_svd_17,base_svd_18,base_svd_19,base_svd_20,base_svd_21,base_svd_22,base_svd_23,base_svd_24,base_svd_25,base_svd_26,base_svd_27,base_svd_28,base_svd_29,base_svd_30,base_svd_31,base_svd_32,base_svd_33,base_svd_34,base_svd_35,base_svd_36,base_svd_37,base_svd_38,base_svd_39,base_svd_40,base_svd_41,base_svd_42,base_svd_43,base_svd_44,base_svd_45,base_svd_46,base_svd_47,base_svd_48,base_svd_49,base_svd_50,base_svd_51,base_svd_52,base_svd_53,base_svd_54,base_svd_55,base_svd_56,base_svd_57,base_svd_58,base_svd_59,base_svd_60,base_svd_61,base_svd_62,base_svd_63,base_svd_64,base_svd_65,base_svd_66,base_svd_67,base_svd_68,base_svd_69,base_svd_70,base_svd_71,base_svd_72,base_svd_73,base_svd_74,base_svd_75,base_svd_76,base_svd_77,base_svd_78,base_svd_79,base_svd_80,base_svd_81,base_svd_82,base_svd_83,base_svd_84,base_svd_85,base_svd_86,base_svd_87,base_svd_88,base_svd_89,base_svd_90,base_svd_91,base_svd_92,base_svd_93,base_svd_94,base_svd_95,base_svd_96,base_svd_97,base_svd_98,base_svd_99,base_svd_100,base_svd_101,base_svd_102,base_svd_103,base_svd_104,base_svd_105,base_svd_106,base_svd_107,base_svd_108,base_svd_109,base_svd_110,base_svd_111,base_svd_112,base_svd_113,base_svd_114,base_svd_115,base_svd_116,base_svd_117,base_svd_118,base_svd_119,base_svd_120,base_svd_121,base_svd_122,base_svd_123,base_svd_124,base_svd_125,base_svd_126,base_svd_127,imp_0,imp_1,imp_2,imp_3,imp_4,imp_5,imp_6,imp_7,imp_8,imp_9,imp_10,imp_11,imp_12,imp_13,imp_14,imp_15,imp_16,imp_17,imp_18,imp_19,imp_20,imp_21,imp_22,imp_23,imp_24,imp_25,imp_26,imp_27,imp_28,imp_29,imp_30,imp_31,imp_32,imp_33,imp_34,imp_35,imp_36,imp_37,imp_38,imp_39,imp_40,imp_41,imp_42,imp_43,imp_44,imp_45,imp_46,imp_47,imp_48,imp_49,imp_50,imp_51,imp_52,imp_53,imp_54,imp_55,imp_56,imp_57,imp_58,imp_59,imp_60,imp_61,imp_62,imp_63,imp_64,imp_65,imp_66,imp_67,imp_68,imp_69,imp_70,imp_71,imp_72,imp_73,imp_74,imp_75,imp_76,imp_77,imp_78,imp_79,imp_80,imp_81,imp_82,imp_83,cell_type
0,83d6659a6a32,94.056297,-7.824483,0.390148,-11.240480,1.049213,-6.360567,-0.291657,-0.807333,1.523107,4.391570,1.058334,1.107465,2.794737,0.232357,0.485993,0.610800,1.087533,-3.675157,-2.152177,0.258529,-0.241675,-0.281301,-0.892517,3.144524,-1.095826,0.484069,1.941438,-2.086771,0.126985,-0.265173,1.782843,0.401962,-0.815250,-1.178362,0.804193,-0.375622,1.732931,0.167944,-0.211067,0.322198,-0.055706,0.074955,-0.984684,0.062712,0.836429,-0.737014,-0.763474,-0.775506,-0.968059,0.333593,-0.577777,-0.923708,0.634263,0.665948,-0.189621,0.493161,-0.988103,-0.436438,0.022296,0.698116,-0.026895,-0.644371,0.149706,0.742314,0.001074,-0.240200,0.196376,-0.087378,-0.114259,0.454667,-1.015756,-0.507654,-0.075879,0.975734,0.691606,-0.203795,-0.039084,-0.425149,-0.519585,0.167085,-0.137737,-1.078830,0.527103,-0.183130,0.473650,-0.550963,-0.575732,-0.393081,1.038305,-0.292080,-0.802224,0.014176,0.436137,-0.697561,0.277782,-0.157537,-0.256823,-0.159989,0.244960,-0.068922,-0.413599,0.258720,0.399672,-0.208129,0.502469,0.384433,-0.383426,-0.506806,-0.165553,0.475413,0.240959,0.584796,-0.482804,0.413303,-0.030615,-0.264916,-0.404241,-0.620478,0.027995,-1.228871,-0.297079,-0.250295,0.024182,0.137247,-0.458344,-0.103292,-0.283405,-0.316505,0.000000,0.508287,0.000000,1.094377,0.000000,0.000000,1.294662,4.059924,2.392116,1.461449,0.0,0.508287,0.0,0.0,0.0,0.0,0.0,0.0,1.294662,1.294662,0.000000,0.000000,0.0,2.031354,1.461449,0.508287,1.940462,0.000000,0.0,2.559081,0.000000,0.000000,0.00000,0.000000,1.940462,0.000000,0.000000,0.000000,1.094377,2.865681,0.508287,1.461449,0.000000,3.337905,1.604354,2.559081,3.604731,2.609067,2.745583,2.450923,2.392116,2.114670,1.094377,0.508287,2.031354,1.840474,1.094377,0.000000,0.508287,0.000000,0.000000,5.809778,0.000000,2.191574,0.000000,0.0,3.449107,0.000000,4.843113,4.489811,4.320063,4.511869,1.094377,1.461449,1.294662,1.604354,0.000000,1.294662,1.294662,2.114670,1.461449,4.198429,4.615403,3.129412,
1,d98594f13d2e,92.367874,-4.847517,0.514445,-8.376771,-1.919603,-4.256521,-1.352172,5.257206,1.822807,2.321193,2.636189,2.633024,0.982998,0.518114,0.128080,0.306104,1.269342,-2.080164,-2.541140,0.760104,-0.550402,-2.429527,0.044688,0.752230,0.119079,-0.403264,1.597901,-0.423613,0.300802,-0.739244,0.666266,-0.138264,-1.897435,1.005163,-1.827034,0.499680,0.951236,1.041719,0.434172,-0.569637,-2.159725,-1.200089,-0.436869,-0.044019,0.341884,0.216819,0.367806,0.440336,-0.379974,-0.532230,0.488940,0.664255,0.076050,-1.076472,0.571799,-0.051388,-0.006142,0.083632,-0.594921,-0.024665,0.519003,-0.247730,0.256428,-0.131643,1.499306,0.367236,-0.217262,-0.950290,-0.592358,-0.824632,0.417456,0.530958,-0.173855,0.189655,0.464845,-0.895781,0.278594,0.029154,-0.141803,0.500085,-0.512404,-0.238833,-0.429215,0.860054,0.407503,1.152907,0.065071,-0.575503,0.950789,0.008994,0.256526,0.415921,-0.654593,-0.204853,0.240657,-0.189212,0.559002,0.144552,0.035368,0.674531,-0.285661,-0.723305,0.601063,0.097456,0.094890,0.523900,-0.118012,-0.865405,0.749517,0.651954,0.198055,-0.107615,-0.404861,0.294643,-0.361705,0.822929,-0.099081,0.079139,-0.105911,-0.291076,-0.118448,0.495592,0.061346,0.332917,-0.221393,0.646960,-0.341091,0.546805,0.000000,0.000000,0.000000,1.981066,0.000000,0.000000,0.577348,4.288819,2.083405,0.577348,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.577348,1.207145,0.000000,0.000000,0.0,1.417123,1.590569,0.577348,1.417123,0.000000,0.0,1.981066,0.000000,0.577348,0.00000,0.577348,1.417123,0.000000,0.000000,0.000000,0.941029,2.983224,0.000000,0.941029,0.000000,2.856900,1.867046,2.856900,3.868667,2.712278,1.981066,2.083405,1.207145,0.941029,0.000000,1.867046,2.176237,1.867046,0.000000,0.000000,0.000000,0.000000,0.000000,5.791753,0.000000,1.417123,0.000000,0.0,3.566078,0.000000,5.001711,4.341034,4.456236,4.583765,1.867046,0.941029,1.207145,1.590569,0.000000,0.941029,1.207145,2.083405,1.207145,4.267145,4.599603,3.059365,
2,5f93d8ffc72f,90.292252,0.146880,-0.271159,-10.908630,-3.214201,5.237493,3.355039,3.375244,2.553044,0.776845,0.540434,3.613944,1.005818,0.078405,2.192658,-0.775795,2.759478,-2.709326,-0.784265,1.249367,0.050844,-2.158159,0.776920,0.989583,-1.557484,-2.504500,1.821522,-0.157095,-0.196657,0.151366,2.136578,-0.454421,-0.938510,-0.812240,-1.540948,0.853244,0.238498,-1.129040,-0.538417,0.603914,-0.147041,-0.573247,-0.776392,-0.213989,0.415222,0.680242,0.659678,1.444883,-0.078884,0.632961,0.490239,-0.703269,0.073249,-1.098591,-0.422700,0.932927,0.367269,0.334818,-0.834014,0.075152,-0.153600,0.953423,-0.731156,0.861282,0.801674,0.063491,0.173407,-0.519551,0.298187,-0.149882,-0.362298,-0.902921,0.125706,-0.269594,-0.097179,0.610862,-1.128629,0.571575,-0.753852,0.661824,-0.173195,-0.646502,0.040034,-0.447189,0.934805,0.504365,-0.828626,0.425130,0.993686,0.641542,-0.242037,-0.775351,-0.370694,-0.619784,0.405900,1.124774,1.002989,-0.739349,-0.102338,-0.287145,0.106964,-0.233670,0.054930,0.829287,0.344867,-0.298101,-0.227078,0.542389,0.671565,0.917085,-0.531678,0.017997,-0.249524,-0.731871,0.016846,-0.254381,0.174406,-0.416123,0.494883,0.170989,0.700896,-0.113854,0.735028,-0.010207,-0.663224,0.219934,1.077817,-0.439766,0.000000,0.000000,0.000000,1.568616,0.667829,0.000000,0.000000,3.756538,1.348073,0.667829,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.348073,0.000000,0.000000,0.000000,0.0,1.064711,1.348073,0.667829,2.256541,0.000000,0.0,1.749200,0.000000,0.000000,0.00000,0.000000,1.902107,0.000000,0.000000,0.000000,1.749200,3.446808,0.000000,1.749200,0.667829,2.151762,1.064711,2.151762,3.799973,2.517696,2.437990,1.902107,1.749200,0.000000,1.064711,0.667829,1.064711,1.348073,0.000000,0.000000,0.000000,0.000000,0.000000,5.912421,0.000000,1.568616,0.000000,0.0,2.895912,0.000000,5.232178,4.564348,4.426641,4.583947,1.348073,2.034706,0.667829,1.902107,0.000000,0.000000,1.348073,0.000000,0.667829,4.060443,4.603168,2.256541,
3,7dfa2699d351,93.805962,-12.445390,10.555358,-0.710891,4.603803,-2.805171,1.068830,1.833984,-4.608570,-0.201459,-3.962924,-0.005088,-1.098507,-1.379581,0.761541,-3.148588,2.913516,0.101658,0.022856,1.432993,-2.331087,-1.915733,-1.166527,2.186154,1.076372,0.130604,0.734702,-0.497981,-0.771763,0.693833,-0.246459,-0.041706,-2.748921,-0.384865,0.335162,1.048026,-1.224275,-0.117028,0.753611,0.859828,-0.645335,0.582266,0.291961,0.176579,-0.875614,-0.406391,0.609829,-0.942578,0.067347,-0.048446,-0.238935,-0.415209,-0.761388,-0.452382,-0.111521,-0.087207,-0.761507,-0.158409,0.188839,-0.832592,-0.993856,-0.803717,0.039336,0.039953,-0.668979,-0.403642,0.250717,0.300318,-0.047885,0.289365,-0.833994,0.166385,0.401594,-0.244863,-0.123470,-0.393044,-0.637198,-0.528741,-0.038649,0.172623,0.574302,0.783882,0.108415,-0.170513,0.196612,-0.214809,-0.523400,0.124907,-0.247740,-0.464020,0.480048,0.126403,-0.058947,-0.218305,0.112533,-0.333537,-0.035461,-0.033330,0.426228,0.032991,-0.607229,0.201048,0.317781,-0.583695,-0.012970,-0.550355,-0.422804,0.357776,0.847442,0.232092,0.755277,-0.097016,-0.748176,-0.028148,-0.121889,0.360223,0.581758,0.046662,0.773319,0.151388,0.411271,-0.567683,0.102688,-0.385164,-0.041409,-0.168106,-0.399426,-0.514307,0.000000,1.490374,0.763723,0.453065,0.453065,0.763723,0.000000,3.446033,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.000403,0.763723,0.000000,0.0,0.000000,0.000000,0.453065,2.651967,0.453065,0.0,2.522726,0.000000,1.720086,0.00000,0.763723,0.763723,1.000403,0.000000,0.000000,0.000000,1.817774,0.453065,2.426242,0.453065,4.203341,0.453065,2.199862,3.019756,2.199862,2.261439,2.766400,2.729691,1.720086,0.000000,0.763723,0.763723,0.000000,0.000000,1.000403,2.064012,0.000000,1.000403,6.125139,0.763723,1.191648,0.000000,0.0,2.766400,0.763723,4.629310,4.228714,4.075599,4.547748,3.702430,3.389675,0.000000,1.000403,0.000000,0.453065,0.453065,0.000000,2.261439,3.917783,4.331779,1.906763,
4,6d2533edd0e0,88.557716,-11.090774,-5.711870,2.695317,-10.597410,1.775790,1.812456,3.326061,-5.383945,0.696400,1.563926,-0.765326,-0.380334,-0.970265,-0.079944,1.173417,1.223524,-2.820121,0.673842,-1.156821,-0.484202,0.746177,0.037934,2.665554,-0.311520,0.708579,0.255822,0.741675,1.119297,-1.120647,0.214879,1.441037,1.238391,-0.537630,0.270196,-0.453500,0.270915,0.527241,0.677029,0.322304,1.486271,-0.471216,0.408005,-0.155857,-0.568602,-1.050784,-0.343367,0.097307,-0.434764,0.242725,1.342605,-0.545367,-0.621526,-0.858444,1.388187,0.179331,0.581259,-0.357712,-0.341630,-2.161029,-0.510752,-0.489257,0.450156,-1.643882,0.237520,0.157225,0.023587,0.256288,0.167107,0.536678,-0.635633,-0.539151,0.389995,-0.116683,-0.687299,-0.153719,-1.008250,0.364290,-0.454153,0.218373,-0.204755,0.360526,0.145482,0.310088,0.845523,0.275738,0.667759,-0.279717,-0.035411,-0.418841,0.727035,-0.449049,0.945009,-0.552824,0.542864,-0.896657,0.199615,0.214975,-0.331306,0.687645,0.182220,0.394036,-0.574243,0.225955,-0.573348,0.271190,0.337097,-0.561208,0.382440,-0.123613,0.011672,0.159826,0.263654,-0.904831,-1.031118,-0.508579,-0.497193,0.288741,0.251183,0.019236,0.610962,-0.932482,0.343904,1.187420,1.050605,-0.445473,0.542514,0.361364,0.000000,0.000000,0.000000,1.612422,0.000000,0.000000,2.200539,4.069838,1.612422,0.849430,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.849430,0.000000,0.000000,0.000000,0.0,1.301996,0.849430,2.338743,2.666056,0.000000,0.0,3.048074,0.000000,0.000000,0.84943,0.849430,2.040125,0.000000,0.000000,0.000000,0.000000,2.912246,0.000000,0.849430,0.000000,3.048074,0.000000,2.568391,3.810308,2.666056,1.612422,3.109642,2.568391,1.612422,1.848967,1.612422,1.301996,0.849430,0.849430,0.000000,0.849430,0.000000,0.000000,5.697448,0.000000,0.849430,0.000000,0.0,3.684146,0.000000,5.093357,4.842623,4.347486,4.564559,2.040125,1.612422,0.000000,2.040125,0.000000,1.848967,0.000000,2.460145,0.849430,3.500123,4.461901,2.040125,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48198,a9b4d99f1f50,85.366356,1.080519,-8.010132,0.796445,-3.396977,1.601400,3.458448,-1.217016,0.786072,4.255503,-2.755232,0.385691,2.922162,1.191900,-3.223046,0.136868,0.361752,0.188772,-3.447983,-1.681520,-2.400188,-2.289102,-0.736924,-1.490947,-2.242228,-0.080314,0.956282,-0.271156,1.296366,-1.180623,-0.625098,0.106756,-0.547222,0.133906,-0.668780,-0.767058,-0.745732,1.135188,-0.465981,-1.657057,1.341717,0.513547,-1.946778,0.553342,0.301789,-1.054872,-0.269763,-0.365018,-0.364072,-0.567840,-0.263121,0.884864,0.179559,-0.909130,1.086737,0.577281,-0.178645,0.705079,0.761436,-0.291615,1.398727,0.820755,-0.234059,0.668246,1.058612,1.035108,0.071919,0.264569,0.839250,-0.201995,-0.701525,1.007342,-0.103322,-0.433468,-0.420088,-0.724183,0.907655,-1.337198,0.345874,-1.149339,-0.106747,0.403213,0.246975,0.363520,-0.575282,0.769409,-0.492506,0.239710,-0.410164,0.490752,0.061744,0.008617,1.046630,0.073186,-1.125118,0.094639,-0.663558,0.149662,-0.116115,-0.599895,0.276712,1.081395,-0.170043,-0.012898,0.705159,0.940782,-0.634582,0.659923,-1.086162,-1.151770,0.585043,-0.631075,-0.093587,-0.537206,0.406563,0.055658,0.487570,0.428913,-0.661988,0.700284,1.036009,0.391724,-0.128284,-0.108069,-0.779801,-0.242760,-0.864690,-0.714510,0.000000,0.000000,0.000000,1.303292,0.000000,0.000000,0.000000,4.166040,2.382485,0.000000,0.0,0.638585,0.0,0.0,0.0,0.0,0.0,0.0,1.025180,0.000000,0.000000,0.000000,0.0,1.303292,1.699095,1.303292,1.850468,0.000000,0.0,2.889381,0.000000,0.000000,0.00000,0.000000,2.098066,0.000000,0.000000,0.000000,0.000000,2.296365,0.000000,1.520649,0.000000,3.187953,1.850468,2.202123,3.417575,2.098066,1.981910,2.838399,1.520649,1.025180,0.000000,1.981910,1.850468,0.000000,0.000000,0.000000,0.000000,0.638585,0.000000,6.094541,0.000000,1.850468,0.000000,0.0,2.838399,0.000000,4.732848,4.463661,4.624966,4.366412,2.727904,0.000000,0.000000,1.850468,0.000000,1.025180,1.025180,1.699095,0.638585,3.224165,4.552285,3.150382,HSC
48199,0e2c1d0782af,81.205399,6.503594,-10.942855,2.147213,1.046774,-0.758410,1.365951,-1.823899,1.040893,4.043109,-0.935470,0.734516,-1.798569,2.394704,-2.455899,0.074219,-1.513982,0.663842,-4.000921,-0.806638,-1.180238,-1.438603,-3.570682,0.084025,0.765049,1.600097,0.822755,0.711581,-0.265519,0.740322,-1.637914,-1.613475,-1.138829,0.689822,0.029636,0.288107,0.951587,0.428669,-0.449886,-0.377786,0.974956,-0.380031,0.425508,-1.011746,-0.455452,0.245029,1.302600,1.871541,0.595938,0.058742,0.611514,-0.196153,-0.215084,-0.283142,-0.214137,0.169092,-0.434177,2.029846,0.689977,-0.487871,0.855896,-0.865864,-1.414682,0.831691,1.132567,-0.059600,-0.249577,0.120503,0.197125,-0.971337,0.072568,0.084610,-0.130287,-0.783091,-0.516379,0.548477,-0.037989,-0.683084,1.180951,-0.378083,-0.416962,0.518270,0.243468,-0.183180,-1.288607,-0.520516,0.424579,-1.538023,1.318404,0.185988,0.421319,-0.115238,-0.717841,1.218067,0.259176,-0.428944,0.233220,-0.391345,-0.165910,0.077671,-0.265290,-0.145123,0.239169,0.763478,-0.863415,0.175534,0.012452,-0.753019,-0.058406,0.654802,0.731386,0.104266,1.254263,0.174936,0.543565,-0.492913,0.158094,-0.563965,-0.145903,-0.425699,0.258020,1.279177,0.781363,-0.452470,0.157240,-0.326319,0.520935,-0.188791,0.000000,0.000000,0.000000,0.757912,0.000000,0.000000,0.757912,4.055097,2.681418,1.184065,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.184065,0.000000,0.000000,0.000000,0.0,1.711139,1.184065,1.897482,2.416301,0.000000,0.0,3.115402,0.757912,0.000000,0.00000,0.757912,2.054495,0.000000,0.000000,0.000000,0.757912,2.600611,0.000000,1.897482,0.000000,2.951835,1.897482,2.190167,3.453569,2.890774,2.681418,2.890774,0.757912,0.000000,0.000000,1.481933,1.184065,1.184065,0.757912,0.000000,0.000000,0.000000,0.000000,5.730957,0.000000,0.757912,0.000000,0.0,2.512697,0.000000,4.912096,4.313453,4.566850,4.601583,3.164475,0.757912,0.000000,1.481933,0.000000,1.481933,0.000000,1.481933,1.184065,4.282626,4.749403,3.705824,HSC
48200,a3cbc5aa0ec3,80.439636,7.672957,15.710591,15.690635,8.388031,-5.819407,22.253601,1.320871,0.245843,-0.062292,6.345723,0.137742,0.052358,-0.312850,-1.177019,1.313636,1.003704,0.869457,-3.321661,0.135167,2.845818,1.450652,-0.839094,-2.402138,1.407723,1.448392,-0.527960,-2.169389,-2.744343,0.891098,1.123585,-0.470671,-0.316016,-2.524772,-0.164838,-0.839472,-0.520922,-0.417371,0.004394,0.850240,1.227654,0.005752,-0.838477,-1.400241,-0.268175,-1.651438,-1.231924,-0.246533,-1.907639,-1.266127,-2.909127,-0.469373,0.736211,0.843152,0.183718,-3.830316,1.950835,1.014072,-0.823337,-0.607501,-0.191715,-0.710901,-0.630074,0.592347,-0.718751,0.169095,0.816668,1.121860,-2.033425,0.261176,-1.139817,-0.728157,1.145277,-0.305126,0.406975,-0.976506,-0.048004,-0.647224,0.973937,0.428247,-0.534282,0.333755,2.385608,-0.779210,0.614054,0.110426,0.318968,-0.811001,-0.328077,-0.166389,-0.776845,-1.341591,-1.096393,-0.458532,0.220149,1.133881,-0.534786,-1.963511,-1.143819,0.302970,-0.042720,0.327223,-1.081703,-0.979657,1.474687,0.319095,0.024592,0.303983,1.102567,0.375897,0.207510,-0.226249,0.158661,-0.544645,-0.423482,0.005524,0.287137,0.355960,0.662703,-0.456724,-0.088502,-0.916667,-0.542742,1.477447,-0.058504,1.226709,-0.957758,1.081939,0.000000,2.445630,0.000000,0.000000,0.775127,3.039977,0.000000,4.004784,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,3.370767,0.000000,0.0,0.000000,0.000000,1.206491,3.286875,0.000000,0.0,0.000000,0.000000,0.000000,0.00000,2.218777,0.775127,2.786325,0.000000,0.000000,0.000000,0.775127,0.000000,2.218777,0.775127,3.679805,0.000000,2.218777,2.856019,2.218777,2.338623,0.000000,0.000000,1.206491,0.000000,0.000000,0.775127,1.737563,0.000000,0.000000,0.000000,0.000000,0.000000,5.409255,0.000000,0.775127,0.000000,0.0,1.506875,1.924882,4.198234,3.410214,3.410214,3.286875,4.025902,3.554067,0.000000,1.206491,0.775127,0.000000,0.000000,0.000000,0.000000,6.133726,3.983210,0.000000,MkP
48201,75b350243add,84.337151,-3.197404,1.886770,5.541795,13.938139,6.305854,-4.405701,-3.572052,2.690609,0.877849,3.526787,0.154360,1.378285,0.823426,-1.125885,-1.530648,2.033235,1.787222,-0.625443,-0.946741,-1.865516,-0.481350,-0.055673,-2.632052,-1.777798,-0.591003,-0.537863,0.625473,1.930437,0.314187,-0.271688,0.958532,0.146981,0.529214,0.589338,-0.807976,0.102165,-0.331888,-1.149505,-1.115255,-0.161035,-1.412556,-0.314395,0.309252,-1.941458,0.536575,-1.403757,-0.266827,-0.030160,-0.115960,-0.021319,0.303994,-1.470444,-0.438163,-0.909147,-0.274562,-2.081506,-0.795673,0.653863,-0.450512,0.108288,0.564409,-1.183599,-0.209260,0.697914,1.705868,-0.212448,0.650631,-0.346046,1.162816,-0.941122,0.599168,0.062152,0.665024,1.329877,-0.512315,0.565533,0.511597,0.970352,-0.133340,-1.347535,-1.277951,0.715768,0.444348,-0.105136,0.777697,0.496998,0.111410,-0.003564,1.091448,-0.480395,-0.485875,0.988169,-1.312475,-0.119006,0.010417,0.689612,-0.479114,-0.117106,0.348491,0.114891,-0.346227,-0.482128,-0.231879,0.078575,-0.569172,-0.349015,-0.290385,0.408152,-1.147283,0.550259,0.507853,0.008024,0.514796,0.407690,-0.232874,-0.045954,-0.001800,-0.021512,0.136286,-0.362910,-0.325278,-0.034947,0.062858,0.903601,-0.055573,-0.527762,-0.357432,0.942551,0.000000,1.592557,1.419014,1.419014,0.578443,0.000000,3.260568,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.578443,0.942551,0.0,0.578443,0.000000,0.942551,2.414340,0.000000,0.0,2.482040,0.000000,1.419014,0.00000,0.000000,1.208895,0.578443,1.740389,0.000000,0.578443,0.942551,1.208895,2.903163,0.000000,4.633018,0.000000,2.178450,3.499946,1.740389,1.592557,1.983217,0.000000,0.578443,0.000000,0.942551,1.869156,0.000000,0.000000,0.000000,2.263414,0.000000,0.000000,6.216047,0.000000,1.419014,0.578443,0.0,2.178450,1.419014,4.537156,4.236187,4.063508,4.165798,4.141190,3.451483,0.000000,0.000000,1.208895,0.000000,0.000000,0.000000,2.859252,1.869156,4.280502,0.578443,EryP


In [128]:
samples_per_cell_type = 5

grouped = X_test_cell_ids.groupby('cell_type')

X_test_shap = pd.DataFrame()

# Iterate over each group (cell_type)
for cell_type, group in grouped:
    sampled_rows = group.sample(n=samples_per_cell_type, replace=False)
#     X_test_shap = X_test_shap.append(sampled_rows)   # deprecated
    X_test_shap = pd.concat([X_test_shap, sampled_rows])

X_test_shap = X_test_shap.reset_index(drop=True)
print(X_test_shap.shape)
X_test_shap.head()

(35, 214)


Unnamed: 0,cell_id,base_svd_0,base_svd_1,base_svd_2,base_svd_3,base_svd_4,base_svd_5,base_svd_6,base_svd_7,base_svd_8,base_svd_9,base_svd_10,base_svd_11,base_svd_12,base_svd_13,base_svd_14,base_svd_15,base_svd_16,base_svd_17,base_svd_18,base_svd_19,base_svd_20,base_svd_21,base_svd_22,base_svd_23,base_svd_24,base_svd_25,base_svd_26,base_svd_27,base_svd_28,base_svd_29,base_svd_30,base_svd_31,base_svd_32,base_svd_33,base_svd_34,base_svd_35,base_svd_36,base_svd_37,base_svd_38,base_svd_39,base_svd_40,base_svd_41,base_svd_42,base_svd_43,base_svd_44,base_svd_45,base_svd_46,base_svd_47,base_svd_48,base_svd_49,base_svd_50,base_svd_51,base_svd_52,base_svd_53,base_svd_54,base_svd_55,base_svd_56,base_svd_57,base_svd_58,base_svd_59,base_svd_60,base_svd_61,base_svd_62,base_svd_63,base_svd_64,base_svd_65,base_svd_66,base_svd_67,base_svd_68,base_svd_69,base_svd_70,base_svd_71,base_svd_72,base_svd_73,base_svd_74,base_svd_75,base_svd_76,base_svd_77,base_svd_78,base_svd_79,base_svd_80,base_svd_81,base_svd_82,base_svd_83,base_svd_84,base_svd_85,base_svd_86,base_svd_87,base_svd_88,base_svd_89,base_svd_90,base_svd_91,base_svd_92,base_svd_93,base_svd_94,base_svd_95,base_svd_96,base_svd_97,base_svd_98,base_svd_99,base_svd_100,base_svd_101,base_svd_102,base_svd_103,base_svd_104,base_svd_105,base_svd_106,base_svd_107,base_svd_108,base_svd_109,base_svd_110,base_svd_111,base_svd_112,base_svd_113,base_svd_114,base_svd_115,base_svd_116,base_svd_117,base_svd_118,base_svd_119,base_svd_120,base_svd_121,base_svd_122,base_svd_123,base_svd_124,base_svd_125,base_svd_126,base_svd_127,imp_0,imp_1,imp_2,imp_3,imp_4,imp_5,imp_6,imp_7,imp_8,imp_9,imp_10,imp_11,imp_12,imp_13,imp_14,imp_15,imp_16,imp_17,imp_18,imp_19,imp_20,imp_21,imp_22,imp_23,imp_24,imp_25,imp_26,imp_27,imp_28,imp_29,imp_30,imp_31,imp_32,imp_33,imp_34,imp_35,imp_36,imp_37,imp_38,imp_39,imp_40,imp_41,imp_42,imp_43,imp_44,imp_45,imp_46,imp_47,imp_48,imp_49,imp_50,imp_51,imp_52,imp_53,imp_54,imp_55,imp_56,imp_57,imp_58,imp_59,imp_60,imp_61,imp_62,imp_63,imp_64,imp_65,imp_66,imp_67,imp_68,imp_69,imp_70,imp_71,imp_72,imp_73,imp_74,imp_75,imp_76,imp_77,imp_78,imp_79,imp_80,imp_81,imp_82,imp_83,cell_type
0,6394c11656fe,91.398605,-2.573413,5.897174,0.835887,-4.996771,2.661109,-1.900818,-4.152645,1.443682,-3.602244,-1.747702,3.906802,1.733961,-2.119727,-1.108597,-3.133109,-4.540565,1.575497,-0.689293,-2.578106,2.50742,-1.177895,-1.675395,-1.366361,-1.123123,1.371597,1.645501,1.258302,-0.117805,-0.994187,-0.609635,1.246436,3.17098,-2.225565,-0.601513,-0.493538,-1.269298,0.059885,-1.085594,-0.917217,-0.597429,-0.815437,-0.426063,-1.677097,0.761961,-0.078423,-0.31013,0.488922,-0.610098,-0.611349,0.95728,-0.177487,1.554586,-0.544638,1.78608,0.542536,-0.162515,-1.671796,0.265801,-0.201104,0.867177,0.172407,-0.740333,-0.255071,0.307698,-1.323099,0.102038,-1.199427,-0.751893,-0.150644,-0.102823,0.159292,-0.199846,0.736028,-0.043564,-1.187134,-0.230672,-0.341662,-1.036231,0.625561,-0.307961,0.372681,1.036267,0.735501,-0.019753,0.549365,-0.570004,0.642439,0.146908,0.189483,-0.222768,0.764498,-0.016893,-0.758978,-0.189126,-0.510955,0.684121,0.364464,0.00047,-0.585148,0.449317,0.44305,-0.113651,0.433871,-0.280579,-0.472669,0.04573,0.57963,-0.194653,0.448946,-0.347279,-0.43704,-0.690481,0.071805,-0.48868,0.886538,0.029418,0.208392,-0.252676,0.725922,0.491173,0.030124,0.460888,-0.105347,0.394213,0.47592,-0.369894,-0.784372,0.0,0.912006,0.556557,1.939784,0.0,0.0,0.0,3.562583,1.380936,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.556557,0.912006,0.0,0.0,0.0,0.0,0.912006,1.380936,3.474309,0.0,0.0,2.43582,0.0,1.17371,0.556557,0.0,1.826578,0.0,0.0,1.552503,0.0,2.811675,0.0,2.368422,0.0,4.697929,0.0,2.218248,3.241562,1.826578,1.552503,2.855459,2.43582,0.556557,0.556557,0.556557,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.89823,0.556557,1.380936,0.0,0.0,2.218248,0.556557,4.582978,4.444368,4.12813,4.381195,3.181561,3.241562,0.912006,0.912006,0.556557,0.556557,0.0,0.0,1.826578,4.677358,4.426723,0.0,BP
1,47ca10428867,94.340584,-0.769871,15.549874,-4.786283,2.480034,-6.995259,-2.822901,-8.582637,3.840781,-0.807404,-3.455627,3.600092,1.450024,-0.015855,-2.458287,-3.777133,-0.728222,1.49953,0.015311,-4.061447,4.792212,-2.743153,-1.840634,-1.344559,-3.111304,2.621312,-0.54648,0.128044,-0.201176,0.312849,-0.456893,1.007501,1.966879,-2.153435,-1.236874,1.929713,-2.381524,2.449579,-1.149438,0.33349,0.823237,-0.586385,-0.36761,-0.061161,0.174256,-0.507963,-0.763498,0.908762,-1.763034,-0.251752,-0.607767,0.726292,0.048975,1.407672,2.070197,-0.212705,0.494788,-0.724998,0.904063,-1.497725,0.231481,-1.26625,-0.167352,-1.604437,0.925232,-0.065531,-1.113606,-0.821714,-1.955168,0.668757,-0.027131,0.903716,0.432536,0.059465,-0.345393,-1.167797,0.26132,-0.284043,-1.154144,0.682826,0.641931,-1.254215,1.251407,-0.184482,0.92174,0.693242,0.46497,0.331271,0.531995,0.497475,-0.866314,0.719519,1.021825,0.132196,-0.241124,-0.187839,-0.490108,0.45458,-0.403318,0.783202,0.018361,-0.375637,0.780749,0.098329,0.59736,-0.273199,-0.263744,0.464435,0.361842,-0.145227,0.542725,-0.17402,-1.038662,-0.007445,-0.213407,-0.124744,-0.892308,-0.845848,-0.612208,-0.223398,-0.086347,0.206546,0.194392,-0.034826,-0.020707,0.011323,0.420549,0.553875,0.0,1.074065,0.0,1.579908,0.826294,1.074065,0.826294,2.36427,0.826294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.826294,0.49617,0.0,0.0,0.0,1.074065,1.074065,1.914237,3.556723,0.0,0.0,2.30197,0.0,1.704189,0.0,0.0,2.422915,1.272474,2.004742,2.478311,0.0,2.628167,0.0,2.004742,1.272474,4.807433,0.0,1.914237,3.100083,1.074065,0.49617,2.716891,1.814718,0.826294,0.0,0.0,0.0,0.0,0.0,0.49617,0.0,0.0,0.826294,6.087715,0.49617,0.49617,0.0,1.437959,2.23553,0.49617,4.390241,3.8139,3.661063,4.253925,3.500165,3.755537,0.49617,1.437959,1.074065,0.49617,0.0,0.0,2.23553,4.436921,4.332875,2.873727,BP
2,eb55c318a3f5,90.744911,2.568184,15.610394,-1.628029,2.25786,-4.922578,-5.242809,-3.748435,6.182809,-1.121708,-4.858407,2.300344,-1.922307,-2.304065,-2.716464,0.298471,-4.17924,0.63192,0.070518,-5.457357,5.616451,-0.054896,-3.531305,0.496471,-1.858853,3.120521,0.148297,1.100644,0.988143,-0.69729,-2.228507,2.235383,-0.025038,-1.018935,-1.213947,1.299989,-2.08576,0.733206,-0.682527,-0.3117,-0.064224,0.019636,0.718056,0.592136,0.715152,0.344199,0.861016,1.229928,-0.912822,-0.15952,-1.186366,-0.840035,-0.072155,1.878896,1.773269,1.822688,0.681435,0.459624,0.869422,-0.119254,1.242553,-1.019596,0.826894,-0.052455,1.354964,0.217741,-0.361909,-0.655758,-0.295328,-0.875897,-0.746506,0.074447,-0.592774,-0.42777,0.769433,-0.431271,0.279094,0.735341,-1.818038,0.699103,1.310451,-0.199613,0.142453,0.842764,0.575169,1.29971,0.27952,-0.094999,0.137312,0.284034,0.277164,0.723835,0.56353,-0.367108,1.333554,-0.688971,0.238771,1.068376,-0.50148,1.185643,0.414546,-1.865954,0.248904,-1.646006,0.262938,-0.364953,0.670893,0.580079,0.368139,0.006508,0.479978,-0.722534,-0.04448,-0.750409,0.325107,-0.385203,0.085095,0.203746,-0.398072,0.493465,-1.600592,-0.36611,0.807967,0.025352,-0.658579,0.784272,0.846943,0.098148,0.0,1.420626,0.487317,0.0,0.487317,0.0,0.0,3.332258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.059118,0.0,0.0,0.0,0.813542,0.813542,0.0,1.561854,3.972538,0.0,0.0,1.985051,0.0,1.561854,0.0,0.0,2.2152,0.487317,1.795674,2.067794,0.0,3.048995,0.0,2.144211,0.487317,4.501672,0.0,2.50984,2.815322,1.985051,0.0,2.144211,1.685583,0.813542,0.0,1.795674,0.0,0.0,0.0,1.059118,0.0,0.0,0.0,5.919199,0.0,1.059118,0.0,0.813542,1.059118,1.561854,4.429486,3.948611,3.948611,4.594705,4.007388,3.687188,0.487317,0.813542,1.059118,0.0,0.0,0.0,2.50984,4.335368,4.494683,3.309577,BP
3,950cb0df05be,68.790382,18.684298,-10.684662,5.680105,8.930925,-9.014524,-1.43025,-1.283236,-2.258491,4.643044,-0.806318,-1.055662,-1.124793,-2.776042,0.03716,-0.290159,-3.95633,-1.448408,-2.758415,-3.568746,-1.0732,-2.844688,-1.49953,1.132815,-1.000769,6.886518,2.41902,-2.253138,0.843889,-1.754883,-1.014424,0.020943,1.979688,3.522782,-2.307371,3.031943,0.709029,-0.21236,1.604567,-3.279294,-3.457439,1.754871,1.184195,-0.259222,2.55496,-1.714911,-1.069957,-0.356863,2.560873,-2.885826,2.574897,-0.130087,1.581909,-0.727403,1.484432,-1.977327,-1.749054,0.762724,0.821048,-0.191419,-1.729314,-0.950804,0.428812,0.293727,-0.220505,-2.623966,-0.272725,0.315644,1.438619,-1.311449,-1.850295,-1.518388,-1.905962,-0.729698,-0.165599,-0.109231,-0.294461,-0.764231,0.197521,-0.710611,-0.609209,1.337496,1.103688,0.268627,1.55987,0.621826,3.80806,0.043963,0.399859,0.277893,-1.027664,-0.216087,1.273526,-0.191977,0.209027,0.612342,-0.504782,-1.763399,0.993363,0.024855,1.348296,-0.619985,-0.960505,-0.37365,-1.459227,-0.165293,-2.415163,0.523257,0.297491,-1.304865,-0.913415,0.417705,0.177478,-1.832481,-0.260322,1.004527,-0.978374,-0.978094,-0.003875,0.125939,-0.858825,-0.610291,0.043005,-0.296316,-1.204168,0.058411,-0.468721,0.058747,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.21962,1.574641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.358258,1.908581,1.574641,1.574641,0.0,0.0,3.391738,1.574641,0.0,0.0,0.0,2.52469,1.0697,0.0,1.0697,0.0,2.903098,0.0,1.574641,0.0,3.512934,3.093745,2.358258,2.52469,3.176972,2.358258,4.034606,1.908581,1.0697,0.0,2.358258,2.358258,2.158485,0.0,0.0,0.0,0.0,0.0,4.718867,0.0,0.0,0.0,0.0,2.903098,0.0,4.891107,4.445449,4.445449,4.784982,3.391738,1.574641,1.574641,2.158485,0.0,1.908581,1.908581,1.0697,1.0697,4.684094,4.987041,3.568435,BP
4,576b8516f44d,92.254715,-3.28381,13.500619,0.032904,-1.250571,-2.333945,-1.900685,-0.24635,0.112816,-1.975811,-5.636484,0.421511,-0.605844,4.44716,0.86986,-0.550896,-3.929692,0.14523,-2.404824,2.206318,4.622179,-2.141047,0.156299,-1.626004,-1.616588,1.902388,1.129532,0.151217,-0.614746,-2.577579,-1.713998,1.956989,-1.860271,-2.304142,-1.452113,0.879879,-2.048893,0.558831,-2.033007,-1.194119,-0.607052,-1.191567,-0.046202,-0.711389,-0.178526,1.141036,0.45862,2.02328,-0.432963,0.631385,-0.672888,-0.279982,-0.710387,0.030256,-0.7028,0.515051,1.004386,-0.224104,0.597239,-0.440105,0.584873,0.200037,0.673555,-0.833832,1.444417,0.24535,-0.284895,0.596813,-0.631818,0.154358,-0.503611,0.29267,-0.296773,0.81875,1.07381,-0.657518,0.456929,-0.945964,-0.154484,-0.205553,0.758927,-1.079701,0.980611,1.268871,0.110668,1.287416,0.26147,-0.402037,-1.21485,0.477963,-1.071552,-0.732076,0.263999,0.938638,-0.338256,0.216482,0.119252,0.469082,-0.766369,0.393222,-0.306972,0.700135,0.391213,-0.417951,-0.506462,-0.477883,0.783351,1.061997,-0.445038,-0.394104,-0.111194,-0.358824,-0.044186,-0.099476,0.491066,0.72761,0.348112,0.484685,-0.168766,0.418561,0.420111,0.311917,0.538588,0.097655,-0.086082,0.526645,-0.582222,-0.550286,0.0,1.807271,0.0,1.728238,0.0,0.0,0.0,2.818068,0.65552,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.380539,0.0,0.0,0.380539,0.0,0.65552,0.65552,1.642418,3.674654,0.65552,0.0,1.880513,0.0,2.012636,0.0,0.0,1.642418,0.870966,0.870966,2.282217,0.0,2.531855,0.0,2.012636,1.198567,4.539185,0.0,2.282217,2.922987,1.444919,0.65552,2.414805,1.444919,0.380539,0.0,0.65552,0.0,1.948755,0.0,1.048122,0.0,0.0,0.0,6.018153,0.870966,1.548536,0.0,0.0,2.282217,0.0,4.136584,3.856663,3.816722,4.615355,4.261608,3.359832,0.0,0.870966,0.380539,0.870966,0.0,0.0,2.669247,4.840184,4.248464,1.198567,BP


In [39]:
# rename imp_ columns to gene ids:
gene_ids = ['ENSG00000075340_ADD2', 'ENSG00000233968_AL157895.1',
        'ENSG00000029534_ANK1', 'ENSG00000135046_ANXA1',
        'ENSG00000130208_APOC1', 'ENSG00000047648_ARHGAP6',
        'ENSG00000101200_AVP', 'ENSG00000166710_B2M',
        'ENSG00000130303_BST2', 'ENSG00000172247_C1QTNF4',
        'ENSG00000170458_CD14', 'ENSG00000134061_CD180',
        'ENSG00000177455_CD19', 'ENSG00000116824_CD2',
        'ENSG00000206531_CD200R1L', 'ENSG00000012124_CD22',
        'ENSG00000272398_CD24', 'ENSG00000139193_CD27',
        'ENSG00000105383_CD33', 'ENSG00000174059_CD34',
        'ENSG00000135218_CD36', 'ENSG00000004468_CD38',
        'ENSG00000010610_CD4', 'ENSG00000026508_CD44',
        'ENSG00000117091_CD48', 'ENSG00000169442_CD52',
        'ENSG00000135404_CD63', 'ENSG00000173762_CD7',
        'ENSG00000137101_CD72', 'ENSG00000019582_CD74',
        'ENSG00000105369_CD79A', 'ENSG00000085117_CD82',
        'ENSG00000114013_CD86', 'ENSG00000010278_CD9',
        'ENSG00000002586_CD99', 'ENSG00000166091_CMTM5',
        'ENSG00000119865_CNRIP1', 'ENSG00000100368_CSF2RB',
        'ENSG00000100448_CTSG', 'ENSG00000051523_CYBA',
        'ENSG00000116675_DNAJC6', 'ENSG00000142227_EMP3',
        'ENSG00000143226_FCGR2A', 'ENSG00000167996_FTH1',
        'ENSG00000139278_GLIPR1', 'ENSG00000130755_GMFG',
        'ENSG00000169567_HINT1', 'ENSG00000206503_HLA-A',
        'ENSG00000234745_HLA-B', 'ENSG00000204287_HLA-DRA',
        'ENSG00000196126_HLA-DRB1', 'ENSG00000204592_HLA-E',
        'ENSG00000171476_HOPX', 'ENSG00000076662_ICAM3',
        'ENSG00000163565_IFI16', 'ENSG00000142089_IFITM3',
        'ENSG00000160593_JAML', 'ENSG00000055118_KCNH2',
        'ENSG00000105610_KLF1', 'ENSG00000139187_KLRG1',
        'ENSG00000133816_MICAL2', 'ENSG00000198938_MT-CO3',
        'ENSG00000107130_NCS1', 'ENSG00000090470_PDCD7',
        'ENSG00000143627_PKLR', 'ENSG00000109099_PMP22',
        'ENSG00000117450_PRDX1', 'ENSG00000112077_RHAG',
        'ENSG00000108107_RPL28', 'ENSG00000198918_RPL39',
        'ENSG00000145425_RPS3A', 'ENSG00000198034_RPS4X',
        'ENSG00000196154_S100A4', 'ENSG00000197956_S100A6',
        'ENSG00000188404_SELL', 'ENSG00000124570_SERPINB6',
        'ENSG00000235169_SMIM1', 'ENSG00000095932_SMIM24',
        'ENSG00000137642_SORL1', 'ENSG00000128040_SPINK2',
        'ENSG00000072274_TFRC', 'ENSG00000205542_TMSB4X',
        'ENSG00000133112_TPT1', 'ENSG00000026025_VIM']

In [129]:
new_columns = []
for col in X_test_shap.columns:
    if col.startswith('imp_'):
        col = gene_ids[int(col.split('_')[1])]
    new_columns.append(col)
X_test_shap.columns = new_columns
print(X_test_shap.shape)
X_test_shap.head()

(35, 214)


Unnamed: 0,cell_id,base_svd_0,base_svd_1,base_svd_2,base_svd_3,base_svd_4,base_svd_5,base_svd_6,base_svd_7,base_svd_8,base_svd_9,base_svd_10,base_svd_11,base_svd_12,base_svd_13,base_svd_14,base_svd_15,base_svd_16,base_svd_17,base_svd_18,base_svd_19,base_svd_20,base_svd_21,base_svd_22,base_svd_23,base_svd_24,base_svd_25,base_svd_26,base_svd_27,base_svd_28,base_svd_29,base_svd_30,base_svd_31,base_svd_32,base_svd_33,base_svd_34,base_svd_35,base_svd_36,base_svd_37,base_svd_38,base_svd_39,base_svd_40,base_svd_41,base_svd_42,base_svd_43,base_svd_44,base_svd_45,base_svd_46,base_svd_47,base_svd_48,base_svd_49,base_svd_50,base_svd_51,base_svd_52,base_svd_53,base_svd_54,base_svd_55,base_svd_56,base_svd_57,base_svd_58,base_svd_59,base_svd_60,base_svd_61,base_svd_62,base_svd_63,base_svd_64,base_svd_65,base_svd_66,base_svd_67,base_svd_68,base_svd_69,base_svd_70,base_svd_71,base_svd_72,base_svd_73,base_svd_74,base_svd_75,base_svd_76,base_svd_77,base_svd_78,base_svd_79,base_svd_80,base_svd_81,base_svd_82,base_svd_83,base_svd_84,base_svd_85,base_svd_86,base_svd_87,base_svd_88,base_svd_89,base_svd_90,base_svd_91,base_svd_92,base_svd_93,base_svd_94,base_svd_95,base_svd_96,base_svd_97,base_svd_98,base_svd_99,base_svd_100,base_svd_101,base_svd_102,base_svd_103,base_svd_104,base_svd_105,base_svd_106,base_svd_107,base_svd_108,base_svd_109,base_svd_110,base_svd_111,base_svd_112,base_svd_113,base_svd_114,base_svd_115,base_svd_116,base_svd_117,base_svd_118,base_svd_119,base_svd_120,base_svd_121,base_svd_122,base_svd_123,base_svd_124,base_svd_125,base_svd_126,base_svd_127,ENSG00000075340_ADD2,ENSG00000233968_AL157895.1,ENSG00000029534_ANK1,ENSG00000135046_ANXA1,ENSG00000130208_APOC1,ENSG00000047648_ARHGAP6,ENSG00000101200_AVP,ENSG00000166710_B2M,ENSG00000130303_BST2,ENSG00000172247_C1QTNF4,ENSG00000170458_CD14,ENSG00000134061_CD180,ENSG00000177455_CD19,ENSG00000116824_CD2,ENSG00000206531_CD200R1L,ENSG00000012124_CD22,ENSG00000272398_CD24,ENSG00000139193_CD27,ENSG00000105383_CD33,ENSG00000174059_CD34,ENSG00000135218_CD36,ENSG00000004468_CD38,ENSG00000010610_CD4,ENSG00000026508_CD44,ENSG00000117091_CD48,ENSG00000169442_CD52,ENSG00000135404_CD63,ENSG00000173762_CD7,ENSG00000137101_CD72,ENSG00000019582_CD74,ENSG00000105369_CD79A,ENSG00000085117_CD82,ENSG00000114013_CD86,ENSG00000010278_CD9,ENSG00000002586_CD99,ENSG00000166091_CMTM5,ENSG00000119865_CNRIP1,ENSG00000100368_CSF2RB,ENSG00000100448_CTSG,ENSG00000051523_CYBA,ENSG00000116675_DNAJC6,ENSG00000142227_EMP3,ENSG00000143226_FCGR2A,ENSG00000167996_FTH1,ENSG00000139278_GLIPR1,ENSG00000130755_GMFG,ENSG00000169567_HINT1,ENSG00000206503_HLA-A,ENSG00000234745_HLA-B,ENSG00000204287_HLA-DRA,ENSG00000196126_HLA-DRB1,ENSG00000204592_HLA-E,ENSG00000171476_HOPX,ENSG00000076662_ICAM3,ENSG00000163565_IFI16,ENSG00000142089_IFITM3,ENSG00000160593_JAML,ENSG00000055118_KCNH2,ENSG00000105610_KLF1,ENSG00000139187_KLRG1,ENSG00000133816_MICAL2,ENSG00000198938_MT-CO3,ENSG00000107130_NCS1,ENSG00000090470_PDCD7,ENSG00000143627_PKLR,ENSG00000109099_PMP22,ENSG00000117450_PRDX1,ENSG00000112077_RHAG,ENSG00000108107_RPL28,ENSG00000198918_RPL39,ENSG00000145425_RPS3A,ENSG00000198034_RPS4X,ENSG00000196154_S100A4,ENSG00000197956_S100A6,ENSG00000188404_SELL,ENSG00000124570_SERPINB6,ENSG00000235169_SMIM1,ENSG00000095932_SMIM24,ENSG00000137642_SORL1,ENSG00000128040_SPINK2,ENSG00000072274_TFRC,ENSG00000205542_TMSB4X,ENSG00000133112_TPT1,ENSG00000026025_VIM,cell_type
0,6394c11656fe,91.398605,-2.573413,5.897174,0.835887,-4.996771,2.661109,-1.900818,-4.152645,1.443682,-3.602244,-1.747702,3.906802,1.733961,-2.119727,-1.108597,-3.133109,-4.540565,1.575497,-0.689293,-2.578106,2.50742,-1.177895,-1.675395,-1.366361,-1.123123,1.371597,1.645501,1.258302,-0.117805,-0.994187,-0.609635,1.246436,3.17098,-2.225565,-0.601513,-0.493538,-1.269298,0.059885,-1.085594,-0.917217,-0.597429,-0.815437,-0.426063,-1.677097,0.761961,-0.078423,-0.31013,0.488922,-0.610098,-0.611349,0.95728,-0.177487,1.554586,-0.544638,1.78608,0.542536,-0.162515,-1.671796,0.265801,-0.201104,0.867177,0.172407,-0.740333,-0.255071,0.307698,-1.323099,0.102038,-1.199427,-0.751893,-0.150644,-0.102823,0.159292,-0.199846,0.736028,-0.043564,-1.187134,-0.230672,-0.341662,-1.036231,0.625561,-0.307961,0.372681,1.036267,0.735501,-0.019753,0.549365,-0.570004,0.642439,0.146908,0.189483,-0.222768,0.764498,-0.016893,-0.758978,-0.189126,-0.510955,0.684121,0.364464,0.00047,-0.585148,0.449317,0.44305,-0.113651,0.433871,-0.280579,-0.472669,0.04573,0.57963,-0.194653,0.448946,-0.347279,-0.43704,-0.690481,0.071805,-0.48868,0.886538,0.029418,0.208392,-0.252676,0.725922,0.491173,0.030124,0.460888,-0.105347,0.394213,0.47592,-0.369894,-0.784372,0.0,0.912006,0.556557,1.939784,0.0,0.0,0.0,3.562583,1.380936,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.556557,0.912006,0.0,0.0,0.0,0.0,0.912006,1.380936,3.474309,0.0,0.0,2.43582,0.0,1.17371,0.556557,0.0,1.826578,0.0,0.0,1.552503,0.0,2.811675,0.0,2.368422,0.0,4.697929,0.0,2.218248,3.241562,1.826578,1.552503,2.855459,2.43582,0.556557,0.556557,0.556557,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.89823,0.556557,1.380936,0.0,0.0,2.218248,0.556557,4.582978,4.444368,4.12813,4.381195,3.181561,3.241562,0.912006,0.912006,0.556557,0.556557,0.0,0.0,1.826578,4.677358,4.426723,0.0,BP
1,47ca10428867,94.340584,-0.769871,15.549874,-4.786283,2.480034,-6.995259,-2.822901,-8.582637,3.840781,-0.807404,-3.455627,3.600092,1.450024,-0.015855,-2.458287,-3.777133,-0.728222,1.49953,0.015311,-4.061447,4.792212,-2.743153,-1.840634,-1.344559,-3.111304,2.621312,-0.54648,0.128044,-0.201176,0.312849,-0.456893,1.007501,1.966879,-2.153435,-1.236874,1.929713,-2.381524,2.449579,-1.149438,0.33349,0.823237,-0.586385,-0.36761,-0.061161,0.174256,-0.507963,-0.763498,0.908762,-1.763034,-0.251752,-0.607767,0.726292,0.048975,1.407672,2.070197,-0.212705,0.494788,-0.724998,0.904063,-1.497725,0.231481,-1.26625,-0.167352,-1.604437,0.925232,-0.065531,-1.113606,-0.821714,-1.955168,0.668757,-0.027131,0.903716,0.432536,0.059465,-0.345393,-1.167797,0.26132,-0.284043,-1.154144,0.682826,0.641931,-1.254215,1.251407,-0.184482,0.92174,0.693242,0.46497,0.331271,0.531995,0.497475,-0.866314,0.719519,1.021825,0.132196,-0.241124,-0.187839,-0.490108,0.45458,-0.403318,0.783202,0.018361,-0.375637,0.780749,0.098329,0.59736,-0.273199,-0.263744,0.464435,0.361842,-0.145227,0.542725,-0.17402,-1.038662,-0.007445,-0.213407,-0.124744,-0.892308,-0.845848,-0.612208,-0.223398,-0.086347,0.206546,0.194392,-0.034826,-0.020707,0.011323,0.420549,0.553875,0.0,1.074065,0.0,1.579908,0.826294,1.074065,0.826294,2.36427,0.826294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.826294,0.49617,0.0,0.0,0.0,1.074065,1.074065,1.914237,3.556723,0.0,0.0,2.30197,0.0,1.704189,0.0,0.0,2.422915,1.272474,2.004742,2.478311,0.0,2.628167,0.0,2.004742,1.272474,4.807433,0.0,1.914237,3.100083,1.074065,0.49617,2.716891,1.814718,0.826294,0.0,0.0,0.0,0.0,0.0,0.49617,0.0,0.0,0.826294,6.087715,0.49617,0.49617,0.0,1.437959,2.23553,0.49617,4.390241,3.8139,3.661063,4.253925,3.500165,3.755537,0.49617,1.437959,1.074065,0.49617,0.0,0.0,2.23553,4.436921,4.332875,2.873727,BP
2,eb55c318a3f5,90.744911,2.568184,15.610394,-1.628029,2.25786,-4.922578,-5.242809,-3.748435,6.182809,-1.121708,-4.858407,2.300344,-1.922307,-2.304065,-2.716464,0.298471,-4.17924,0.63192,0.070518,-5.457357,5.616451,-0.054896,-3.531305,0.496471,-1.858853,3.120521,0.148297,1.100644,0.988143,-0.69729,-2.228507,2.235383,-0.025038,-1.018935,-1.213947,1.299989,-2.08576,0.733206,-0.682527,-0.3117,-0.064224,0.019636,0.718056,0.592136,0.715152,0.344199,0.861016,1.229928,-0.912822,-0.15952,-1.186366,-0.840035,-0.072155,1.878896,1.773269,1.822688,0.681435,0.459624,0.869422,-0.119254,1.242553,-1.019596,0.826894,-0.052455,1.354964,0.217741,-0.361909,-0.655758,-0.295328,-0.875897,-0.746506,0.074447,-0.592774,-0.42777,0.769433,-0.431271,0.279094,0.735341,-1.818038,0.699103,1.310451,-0.199613,0.142453,0.842764,0.575169,1.29971,0.27952,-0.094999,0.137312,0.284034,0.277164,0.723835,0.56353,-0.367108,1.333554,-0.688971,0.238771,1.068376,-0.50148,1.185643,0.414546,-1.865954,0.248904,-1.646006,0.262938,-0.364953,0.670893,0.580079,0.368139,0.006508,0.479978,-0.722534,-0.04448,-0.750409,0.325107,-0.385203,0.085095,0.203746,-0.398072,0.493465,-1.600592,-0.36611,0.807967,0.025352,-0.658579,0.784272,0.846943,0.098148,0.0,1.420626,0.487317,0.0,0.487317,0.0,0.0,3.332258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.059118,0.0,0.0,0.0,0.813542,0.813542,0.0,1.561854,3.972538,0.0,0.0,1.985051,0.0,1.561854,0.0,0.0,2.2152,0.487317,1.795674,2.067794,0.0,3.048995,0.0,2.144211,0.487317,4.501672,0.0,2.50984,2.815322,1.985051,0.0,2.144211,1.685583,0.813542,0.0,1.795674,0.0,0.0,0.0,1.059118,0.0,0.0,0.0,5.919199,0.0,1.059118,0.0,0.813542,1.059118,1.561854,4.429486,3.948611,3.948611,4.594705,4.007388,3.687188,0.487317,0.813542,1.059118,0.0,0.0,0.0,2.50984,4.335368,4.494683,3.309577,BP
3,950cb0df05be,68.790382,18.684298,-10.684662,5.680105,8.930925,-9.014524,-1.43025,-1.283236,-2.258491,4.643044,-0.806318,-1.055662,-1.124793,-2.776042,0.03716,-0.290159,-3.95633,-1.448408,-2.758415,-3.568746,-1.0732,-2.844688,-1.49953,1.132815,-1.000769,6.886518,2.41902,-2.253138,0.843889,-1.754883,-1.014424,0.020943,1.979688,3.522782,-2.307371,3.031943,0.709029,-0.21236,1.604567,-3.279294,-3.457439,1.754871,1.184195,-0.259222,2.55496,-1.714911,-1.069957,-0.356863,2.560873,-2.885826,2.574897,-0.130087,1.581909,-0.727403,1.484432,-1.977327,-1.749054,0.762724,0.821048,-0.191419,-1.729314,-0.950804,0.428812,0.293727,-0.220505,-2.623966,-0.272725,0.315644,1.438619,-1.311449,-1.850295,-1.518388,-1.905962,-0.729698,-0.165599,-0.109231,-0.294461,-0.764231,0.197521,-0.710611,-0.609209,1.337496,1.103688,0.268627,1.55987,0.621826,3.80806,0.043963,0.399859,0.277893,-1.027664,-0.216087,1.273526,-0.191977,0.209027,0.612342,-0.504782,-1.763399,0.993363,0.024855,1.348296,-0.619985,-0.960505,-0.37365,-1.459227,-0.165293,-2.415163,0.523257,0.297491,-1.304865,-0.913415,0.417705,0.177478,-1.832481,-0.260322,1.004527,-0.978374,-0.978094,-0.003875,0.125939,-0.858825,-0.610291,0.043005,-0.296316,-1.204168,0.058411,-0.468721,0.058747,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.21962,1.574641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.358258,1.908581,1.574641,1.574641,0.0,0.0,3.391738,1.574641,0.0,0.0,0.0,2.52469,1.0697,0.0,1.0697,0.0,2.903098,0.0,1.574641,0.0,3.512934,3.093745,2.358258,2.52469,3.176972,2.358258,4.034606,1.908581,1.0697,0.0,2.358258,2.358258,2.158485,0.0,0.0,0.0,0.0,0.0,4.718867,0.0,0.0,0.0,0.0,2.903098,0.0,4.891107,4.445449,4.445449,4.784982,3.391738,1.574641,1.574641,2.158485,0.0,1.908581,1.908581,1.0697,1.0697,4.684094,4.987041,3.568435,BP
4,576b8516f44d,92.254715,-3.28381,13.500619,0.032904,-1.250571,-2.333945,-1.900685,-0.24635,0.112816,-1.975811,-5.636484,0.421511,-0.605844,4.44716,0.86986,-0.550896,-3.929692,0.14523,-2.404824,2.206318,4.622179,-2.141047,0.156299,-1.626004,-1.616588,1.902388,1.129532,0.151217,-0.614746,-2.577579,-1.713998,1.956989,-1.860271,-2.304142,-1.452113,0.879879,-2.048893,0.558831,-2.033007,-1.194119,-0.607052,-1.191567,-0.046202,-0.711389,-0.178526,1.141036,0.45862,2.02328,-0.432963,0.631385,-0.672888,-0.279982,-0.710387,0.030256,-0.7028,0.515051,1.004386,-0.224104,0.597239,-0.440105,0.584873,0.200037,0.673555,-0.833832,1.444417,0.24535,-0.284895,0.596813,-0.631818,0.154358,-0.503611,0.29267,-0.296773,0.81875,1.07381,-0.657518,0.456929,-0.945964,-0.154484,-0.205553,0.758927,-1.079701,0.980611,1.268871,0.110668,1.287416,0.26147,-0.402037,-1.21485,0.477963,-1.071552,-0.732076,0.263999,0.938638,-0.338256,0.216482,0.119252,0.469082,-0.766369,0.393222,-0.306972,0.700135,0.391213,-0.417951,-0.506462,-0.477883,0.783351,1.061997,-0.445038,-0.394104,-0.111194,-0.358824,-0.044186,-0.099476,0.491066,0.72761,0.348112,0.484685,-0.168766,0.418561,0.420111,0.311917,0.538588,0.097655,-0.086082,0.526645,-0.582222,-0.550286,0.0,1.807271,0.0,1.728238,0.0,0.0,0.0,2.818068,0.65552,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.380539,0.0,0.0,0.380539,0.0,0.65552,0.65552,1.642418,3.674654,0.65552,0.0,1.880513,0.0,2.012636,0.0,0.0,1.642418,0.870966,0.870966,2.282217,0.0,2.531855,0.0,2.012636,1.198567,4.539185,0.0,2.282217,2.922987,1.444919,0.65552,2.414805,1.444919,0.380539,0.0,0.65552,0.0,1.948755,0.0,1.048122,0.0,0.0,0.0,6.018153,0.870966,1.548536,0.0,0.0,2.282217,0.0,4.136584,3.856663,3.816722,4.615355,4.261608,3.359832,0.0,0.870966,0.380539,0.870966,0.0,0.0,2.669247,4.840184,4.248464,1.198567,BP


In [41]:
# X_train for model #16: 'X_svd_128.pickle'
X_train = pd.read_pickle(cite_feature_path  + 'X_svd_128.pickle')
X_train = np.array(X_train)
print('X_train: ', X_train.shape)
print('X_test: ', X_test.shape)

explainer = shap.KernelExplainer(model, shap.sample(X_train, 1000))
explainer

Using 1000 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


X_train:  (70988, 212)
X_test:  (26867, 212)


<shap.explainers._kernel.Kernel at 0x7f38a2370940>

In [42]:
private_test_input.shape

(26867, 22085)

In [43]:
private_test_target.shape

(26867, 140)

In [130]:
xtest = X_test_shap#.drop(['cell_id', 'cell_type'], axis=1)

In [45]:
print(X_test_shap.shape)
X_test_shap.head()

(35, 214)


Unnamed: 0,cell_id,base_svd_0,base_svd_1,base_svd_2,base_svd_3,base_svd_4,base_svd_5,base_svd_6,base_svd_7,base_svd_8,base_svd_9,base_svd_10,base_svd_11,base_svd_12,base_svd_13,base_svd_14,base_svd_15,base_svd_16,base_svd_17,base_svd_18,base_svd_19,base_svd_20,base_svd_21,base_svd_22,base_svd_23,base_svd_24,base_svd_25,base_svd_26,base_svd_27,base_svd_28,base_svd_29,base_svd_30,base_svd_31,base_svd_32,base_svd_33,base_svd_34,base_svd_35,base_svd_36,base_svd_37,base_svd_38,base_svd_39,base_svd_40,base_svd_41,base_svd_42,base_svd_43,base_svd_44,base_svd_45,base_svd_46,base_svd_47,base_svd_48,base_svd_49,base_svd_50,base_svd_51,base_svd_52,base_svd_53,base_svd_54,base_svd_55,base_svd_56,base_svd_57,base_svd_58,base_svd_59,base_svd_60,base_svd_61,base_svd_62,base_svd_63,base_svd_64,base_svd_65,base_svd_66,base_svd_67,base_svd_68,base_svd_69,base_svd_70,base_svd_71,base_svd_72,base_svd_73,base_svd_74,base_svd_75,base_svd_76,base_svd_77,base_svd_78,base_svd_79,base_svd_80,base_svd_81,base_svd_82,base_svd_83,base_svd_84,base_svd_85,base_svd_86,base_svd_87,base_svd_88,base_svd_89,base_svd_90,base_svd_91,base_svd_92,base_svd_93,base_svd_94,base_svd_95,base_svd_96,base_svd_97,base_svd_98,base_svd_99,base_svd_100,base_svd_101,base_svd_102,base_svd_103,base_svd_104,base_svd_105,base_svd_106,base_svd_107,base_svd_108,base_svd_109,base_svd_110,base_svd_111,base_svd_112,base_svd_113,base_svd_114,base_svd_115,base_svd_116,base_svd_117,base_svd_118,base_svd_119,base_svd_120,base_svd_121,base_svd_122,base_svd_123,base_svd_124,base_svd_125,base_svd_126,base_svd_127,ENSG00000075340_ADD2,ENSG00000233968_AL157895.1,ENSG00000029534_ANK1,ENSG00000135046_ANXA1,ENSG00000130208_APOC1,ENSG00000047648_ARHGAP6,ENSG00000101200_AVP,ENSG00000166710_B2M,ENSG00000130303_BST2,ENSG00000172247_C1QTNF4,ENSG00000170458_CD14,ENSG00000134061_CD180,ENSG00000177455_CD19,ENSG00000116824_CD2,ENSG00000206531_CD200R1L,ENSG00000012124_CD22,ENSG00000272398_CD24,ENSG00000139193_CD27,ENSG00000105383_CD33,ENSG00000174059_CD34,ENSG00000135218_CD36,ENSG00000004468_CD38,ENSG00000010610_CD4,ENSG00000026508_CD44,ENSG00000117091_CD48,ENSG00000169442_CD52,ENSG00000135404_CD63,ENSG00000173762_CD7,ENSG00000137101_CD72,ENSG00000019582_CD74,ENSG00000105369_CD79A,ENSG00000085117_CD82,ENSG00000114013_CD86,ENSG00000010278_CD9,ENSG00000002586_CD99,ENSG00000166091_CMTM5,ENSG00000119865_CNRIP1,ENSG00000100368_CSF2RB,ENSG00000100448_CTSG,ENSG00000051523_CYBA,ENSG00000116675_DNAJC6,ENSG00000142227_EMP3,ENSG00000143226_FCGR2A,ENSG00000167996_FTH1,ENSG00000139278_GLIPR1,ENSG00000130755_GMFG,ENSG00000169567_HINT1,ENSG00000206503_HLA-A,ENSG00000234745_HLA-B,ENSG00000204287_HLA-DRA,ENSG00000196126_HLA-DRB1,ENSG00000204592_HLA-E,ENSG00000171476_HOPX,ENSG00000076662_ICAM3,ENSG00000163565_IFI16,ENSG00000142089_IFITM3,ENSG00000160593_JAML,ENSG00000055118_KCNH2,ENSG00000105610_KLF1,ENSG00000139187_KLRG1,ENSG00000133816_MICAL2,ENSG00000198938_MT-CO3,ENSG00000107130_NCS1,ENSG00000090470_PDCD7,ENSG00000143627_PKLR,ENSG00000109099_PMP22,ENSG00000117450_PRDX1,ENSG00000112077_RHAG,ENSG00000108107_RPL28,ENSG00000198918_RPL39,ENSG00000145425_RPS3A,ENSG00000198034_RPS4X,ENSG00000196154_S100A4,ENSG00000197956_S100A6,ENSG00000188404_SELL,ENSG00000124570_SERPINB6,ENSG00000235169_SMIM1,ENSG00000095932_SMIM24,ENSG00000137642_SORL1,ENSG00000128040_SPINK2,ENSG00000072274_TFRC,ENSG00000205542_TMSB4X,ENSG00000133112_TPT1,ENSG00000026025_VIM,cell_type
0,46a536d2aba8,92.757668,-8.058743,9.577777,-0.079485,-6.457353,1.782744,1.560264,-2.40932,-5.719316,-0.597724,-3.344079,0.26151,0.712637,-1.025209,-0.820873,2.076658,-0.614023,-2.944788,-1.847399,-2.514676,2.681781,-2.113858,0.041229,1.844948,-3.045417,1.180915,0.486827,0.91594,-1.159498,-0.189862,-1.401075,0.517407,0.519325,-2.193505,0.27978,0.861897,-0.441402,-0.099304,-0.619028,0.276481,1.756663,0.022114,1.02875,0.448088,1.031782,-0.734927,-0.772353,-1.484944,0.622118,1.198598,1.077464,1.185813,2.432313,0.742448,-0.45615,-0.554208,0.281873,-0.699673,-0.123434,-0.976555,0.201653,-0.497153,-0.275681,0.304227,-0.275486,0.556664,0.563214,0.216395,-0.260924,1.422841,-0.151419,0.984403,-0.248037,0.250261,0.153713,-0.600972,0.070738,-0.350203,-0.136995,-0.798399,-0.453775,-1.448212,-0.143278,0.372935,-0.548553,-0.187298,-0.01902,-0.324034,-0.232576,0.888468,1.234241,-0.350593,-0.38574,0.430051,-0.345604,0.173607,1.111932,-0.072923,0.811825,-0.159,0.503684,0.03204,-0.338025,0.046852,0.976891,0.096756,0.129819,0.462481,0.963813,0.051166,-0.469401,0.396085,-0.570683,0.593854,0.383406,0.691101,0.010104,-0.214343,0.659647,0.544503,0.919808,-0.4565,0.36293,-0.58487,0.315288,0.292631,-0.630425,-0.260327,0.0,1.635721,0.0,2.132945,0.602474,0.0,0.0,3.075734,2.390557,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.602474,1.247002,0.602474,0.0,0.0,0.97579,0.0,0.97579,3.398932,0.0,0.0,1.914924,0.0,0.602474,0.0,0.0,2.029864,0.0,0.97579,0.0,0.602474,2.029864,0.0,2.71156,0.602474,4.910748,0.0,2.463531,3.217768,2.311837,1.635721,1.914924,1.247002,1.247002,0.0,0.97579,1.247002,0.0,0.0,0.0,0.0,0.0,0.0,5.912632,0.0,1.247002,0.0,0.0,2.595215,0.0,4.67875,4.242914,4.396709,4.582064,2.765021,3.528278,0.602474,0.97579,0.0,1.247002,0.0,0.0,1.914924,4.143093,4.300594,2.463531,BP
1,fc907e30e1c7,87.748589,2.939809,8.324214,11.730528,-5.312134,2.754924,1.124171,-0.107249,-0.861255,-0.474099,-0.096643,4.139178,-0.785354,0.55657,-1.454497,-0.296469,1.250577,1.856696,-1.429976,-0.224142,4.836112,-4.601017,-1.365693,1.027841,-3.746361,1.173681,-1.064956,0.750331,-1.224923,-1.701862,-2.715031,0.735515,1.126971,-2.102104,-0.034499,1.181116,0.539291,0.491358,-1.497633,0.849895,0.366041,0.004707,-0.328731,0.098814,0.451274,0.474204,-1.695058,0.577827,-1.157057,0.722223,-0.952439,0.052284,-1.102726,0.593379,-0.533272,-1.157565,0.973736,-0.416797,1.440333,-0.971118,0.194018,0.682365,0.141323,0.361523,0.269414,-1.39808,-1.908589,0.457186,-2.384468,1.402816,0.743644,0.613076,-1.177143,-0.040336,0.452729,-0.512667,0.27821,-0.821506,0.04412,1.866801,-0.025607,0.728059,0.477198,0.115625,0.47935,-0.280183,-0.083541,1.57878,1.049939,-0.305622,-2.065483,0.152722,0.960114,-1.191087,0.415119,-0.394231,-0.128811,0.206205,0.054787,-0.180848,-0.644812,0.577997,-0.189572,0.295264,-0.477962,-0.590858,-0.006418,0.670363,0.855791,-1.054547,-0.07656,-0.583436,-0.567577,-0.967555,0.376508,0.396529,0.001735,-0.129443,-0.056476,0.072339,-0.29707,-0.361062,0.001889,-0.560132,0.53726,-0.08744,-0.501599,-0.556385,0.0,1.40562,0.0,1.40562,1.546214,0.0,0.0,3.059419,0.479713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.479713,0.0,0.0,0.80255,1.40562,2.491634,3.533482,0.0,0.0,1.40562,0.0,1.40562,0.0,0.0,1.669456,0.479713,0.80255,1.546214,0.479713,2.325732,0.0,2.491634,0.479713,4.660169,0.0,2.384145,3.059419,0.0,0.0,1.40562,1.779162,0.479713,0.479713,1.046207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.74263,0.0,0.0,0.0,0.0,2.588683,0.0,4.432096,3.964831,3.892181,4.202756,3.419343,3.496865,0.0,0.479713,0.479713,0.0,0.0,0.0,1.241975,5.500368,4.363952,3.496865,BP
2,6c3abb20aa12,73.59713,13.867376,-9.224725,7.674225,4.108009,-5.88492,0.485238,-1.126025,-1.185959,7.279289,-0.796318,-0.934948,-1.275834,-3.800754,1.857038,-1.621768,-5.493173,-0.198597,-3.395124,1.076029,-0.018951,-1.978422,-0.872213,-0.011921,1.351449,4.90734,1.145118,-1.131107,0.872578,-0.410626,0.308265,-0.31241,1.903793,0.565585,-4.13993,1.839141,2.124812,0.287274,0.800602,-2.362428,-2.396079,-1.096093,2.105348,-0.752943,1.442793,-0.851058,-1.108114,0.249956,1.402438,-1.241134,1.613356,-1.394946,0.717791,-0.541539,-0.08797,-0.526469,-2.997173,-0.612356,1.124466,-0.610287,-0.343533,-0.234219,1.344739,3.173358,1.21479,-2.170266,-0.421518,1.451979,1.21795,-1.638761,-1.556677,-0.863195,-0.227564,0.424786,1.876187,0.033507,-1.644444,-0.175968,0.539582,-1.425307,1.036682,1.803811,0.493029,-0.057035,1.200654,0.552618,0.821476,0.495717,0.628717,0.665055,-0.500265,-1.785992,-0.495594,-1.425306,0.638918,-0.938261,-0.951348,-0.298726,0.592977,-1.04793,1.668349,1.399558,1.041869,0.092368,-0.247543,0.429845,1.197259,-0.695922,0.014523,-0.531563,0.106461,-0.170211,1.114107,0.170809,-1.064307,-0.022621,0.41082,-0.876322,0.862757,0.806608,-0.454176,0.74633,-0.741056,1.135874,-0.204382,-0.20355,0.417339,-0.365351,0.0,0.0,0.0,1.729657,0.0,0.0,0.0,4.39943,1.97199,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.409151,2.166902,0.0,0.0,4.009483,0.0,0.0,0.0,0.0,2.329951,0.0,0.0,0.934617,0.934617,2.593007,0.0,0.0,0.0,4.090288,0.934617,1.729657,2.593007,2.702445,2.890851,3.599051,2.890851,1.729657,1.409151,1.97199,1.729657,0.0,0.934617,0.934617,0.0,0.0,0.0,4.838227,0.0,0.934617,0.0,0.0,2.470104,0.0,5.047442,4.664799,4.39943,4.436715,2.702445,2.593007,0.0,1.729657,0.0,0.0,0.934617,1.409151,0.934617,4.507358,5.261309,2.470104,BP
3,7798cfe1a4a7,91.4748,-5.402732,9.286153,0.100451,-7.680589,1.630755,1.573954,0.823974,-7.290368,1.001204,-6.370655,-3.664388,1.579531,-2.555962,-1.794636,0.33097,-1.119129,1.897741,-2.115047,-1.618493,1.218709,-1.706422,-1.177093,2.369483,-0.433818,1.865745,-0.226942,1.28714,-1.733597,-2.204083,-1.326147,-1.050304,2.477045,-1.746855,1.485645,-1.323803,-0.944849,-1.168669,-1.766646,-0.187135,-0.151067,1.476696,0.68726,-0.2091,2.769048,-0.624996,-1.181354,0.458898,0.681754,0.87294,-0.668754,-0.044318,1.391046,1.629789,-0.567407,-1.300942,0.156621,0.239592,1.867976,-1.353889,0.551916,0.201357,0.282734,0.749688,-0.225121,0.368272,-0.09143,0.146581,-1.450666,0.331622,-1.138813,-0.365217,-0.279051,0.740319,-0.367279,-0.648453,0.627753,-0.1613,-0.166728,-1.704128,0.695958,0.12197,-0.472168,1.390563,0.361814,-0.420197,-0.394136,-0.695618,0.221315,-0.258627,0.798815,0.182001,-0.263613,-0.112622,-0.707776,0.003124,0.456522,0.165681,0.306835,0.47223,-0.551491,0.107302,0.171983,0.753305,-0.240656,-0.668775,0.268039,0.522227,0.136707,-0.429313,0.217079,0.14504,0.078762,0.887923,1.238912,0.584527,-0.756013,0.398311,1.059609,-0.141323,0.201848,-1.408159,0.567326,0.244284,0.425444,0.050346,-0.898539,1.305165,0.0,0.0,0.0,2.004151,0.727532,0.0,0.0,3.323205,0.727532,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.727532,0.0,0.0,0.0,0.0,0.0,0.0,0.727532,3.10903,0.0,0.0,2.627533,0.0,1.848428,0.0,0.0,1.437438,0.0,1.144201,2.363651,0.0,2.896986,0.0,2.896986,1.144201,4.473548,0.0,1.848428,3.323205,2.83612,1.437438,2.771308,2.138861,0.727532,0.0,1.437438,0.727532,0.0,0.0,0.0,0.727532,0.0,0.0,5.624202,1.144201,1.437438,0.0,0.0,2.547069,0.0,4.662086,4.610226,4.54422,4.271343,3.323205,3.937464,0.0,1.437438,0.0,0.727532,0.727532,0.0,2.004151,4.641664,4.577767,2.363651,BP
4,f507be21e97c,91.700363,-6.99156,5.100821,1.828532,-8.717406,5.355539,1.581861,-4.269363,-1.806359,-3.23642,-2.674693,1.029609,1.451005,-0.865789,-1.127601,0.029317,-1.324081,-0.672318,-3.007694,-1.562025,2.013783,-1.612601,-0.880799,2.035047,-2.54401,0.970761,-0.482916,1.573086,-2.69504,-1.348723,-1.118577,2.334144,-0.666091,-1.39859,0.149004,-0.854723,-0.949127,-1.06746,-1.313829,0.445574,-0.429468,-1.331614,0.043371,0.727165,0.822859,0.13808,-0.078525,0.193308,-0.541749,-0.43574,0.543456,-0.505725,-0.402671,-0.827664,0.457341,-0.608877,1.315826,-0.977678,0.514699,0.328119,1.021269,0.284869,-0.127716,0.435849,-0.222716,-0.421455,0.330328,-0.472248,-0.557332,0.194555,-0.384403,-0.489149,-0.657106,0.391989,1.38869,-0.656462,0.501602,-0.381809,-0.377426,-0.445464,0.476105,-0.302533,0.340321,0.470348,-0.83552,-0.064335,-0.277794,-0.470001,-0.834101,-0.230554,0.243059,-0.21539,0.384466,-0.116515,-1.154996,-1.092127,-0.449637,0.565256,0.149007,-0.312232,0.177604,0.601108,-0.014226,-0.170097,-0.312463,-1.123947,-0.446112,0.230012,0.815282,0.438774,-0.640016,0.042797,0.422995,1.54078,0.463713,-0.033818,-0.220272,-0.324705,0.543078,-0.644272,-0.545668,-0.365737,-0.219087,0.028081,0.133752,0.230653,0.164321,0.18362,0.0,0.580923,0.0,2.268467,0.0,0.0,1.59705,3.617213,1.59705,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.945996,0.580923,0.0,0.0,0.0,0.580923,0.0,1.212853,3.505414,0.0,0.580923,1.873926,0.0,1.42329,0.0,0.0,1.42329,0.0,1.988081,1.873926,0.0,3.029757,0.945996,1.988081,0.0,4.534314,0.0,2.719873,3.406002,1.42329,1.42329,2.183451,0.945996,0.580923,0.0,1.42329,0.0,0.0,0.0,0.0,0.580923,0.945996,1.42329,5.972488,0.0,1.873926,0.0,0.0,3.137895,0.0,4.525822,4.286061,4.264147,4.575721,2.666582,2.419474,0.0,0.945996,0.0,0.945996,0.580923,0.0,2.610291,4.427326,4.349061,1.212853,BP


In [46]:
with open('X_test_shap_16.pkl', 'wb') as f:
    pickle.dump(X_test_shap, f)

In [47]:
# features: genes and svd -> omnipath: genes
# model: mostly relying on genes or svd? -> later

In [48]:
# don't need to run again: np.load('shap_values.npy', allow_pickle=True)
# %timeit
# shap_values = explainer.shap_values(xtest, nsamples=300)  #500? 
# print(len(shap_values)) # -> 140 genes
# print(len(shap_values[0])) # -> number of samples in xtest
# print(shap_values[0].shape)

# np.save('shap_values_16.npy', np.array(shap_values, dtype=object), allow_pickle=True)

In [123]:
shap_values = np.load('shap_values_16.npy', allow_pickle=True).astype(float)

In [50]:
shap_values[0]

array([[-0.00125789, -0.0289969 ,  0.19679437, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.03272793,  0.31874283, ...,  0.        ,
        -0.00038133,  0.        ],
       [ 0.        ,  0.01561575,  0.2119803 , ...,  0.        ,
         0.        ,  0.02033714],
       ...,
       [ 0.        ,  0.14269746,  0.        , ...,  0.        ,
         0.        ,  0.00223412],
       [-0.00286447,  0.0253865 ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.13020312,  0.00273106, ...,  0.        ,
         0.        ,  0.        ]])