# Topological Methods for Visualization and Analysis of High Dimensional Single-Cell RNA Sequencing Data
Tongxin Wang    
Department of Computer Science, Indiana University Bloomington     
Travis Johnson    
Department of Biomedical Informatics, Ohio State University    
Jie Zhang    
Department of Medicine, Indiana University School of Medicine    
Kun Huang    
Department of Medicine, Indiana University School of Medicine    
Regenstrief Institute

In [1]:
#!/usr/bin/env python
import numpy as np
from plotly.offline import init_notebook_mode
from read_data import read_csv
from simple_mapper import f_tSNE, f_PCA, f_Isomap, f_LLE, f_SpectralEmbedding
from expr import plot_mapper
from vis_2d import plot_2d

init_notebook_mode(connected=True)

##  1. Visualization of melanoma cells

In [2]:
data_filename = 'data/GSE72056.csv'
geneExp_filename = 'data/GSE72056_95.txt'
label_only = 1
num_labels = 3
take_log = 0

geneExp = np.loadtxt(geneExp_filename, delimiter=',')
label, label2idx, idx2label = read_csv(data_filename, label_only, num_labels, take_log)

label_idx = 1 #malignat vs non-malignant
label_vis = label[label_idx,:]
idx2label_vis = idx2label[label_idx]

### Dimensionality reduction techniques

In [3]:
n_comp = 2

title = 'tSNE'
metric = 'correlation'
vis_data = f_tSNE(geneExp, n_comp, metric)
plot_2d(title, vis_data, label_vis, idx2label_vis)

title = 'PCA'
PCA_data = f_PCA(geneExp, n_comp)
plot_2d(title, PCA_data, label_vis, idx2label_vis)

title = 'Isomap'
n_neigh = 50
vis_data = f_Isomap(geneExp, n_comp, n_neigh)
plot_2d(title, vis_data, label_vis, idx2label_vis)

title = 'LLE'
n_neigh = 50
method = 'standard'
vis_data = f_LLE(geneExp, n_comp, n_neigh, method)
plot_2d(title, vis_data, label_vis, idx2label_vis)

title = 'Spectral Embedding'
n_neigh = 50
vis_data =  f_SpectralEmbedding(geneExp, n_comp, n_neigh)
plot_2d(title, vis_data, label_vis, idx2label_vis)

### Mapper

In [4]:
filter_method = 'eccentricity'
filter_par = 'correlation'
cover_method = 'equal_sample'
cls_metric = 'correlation'
cls_method = 'single'
nbins = [30]
poverlap = [0.5]

title = 'Single linkage for hierarchical clustering'
plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nbins, poverlap, cls_metric, cls_method, [])

###  using mapper with different number of bins

In [5]:
nbins = [[10],[30],[50],[100]]
poverlap = [[0.5],[0.5],[0.5],[0.5]]
for i in range(len(nbins)):
    nb = nbins[i]
    p = poverlap[i]
    title = 'nbins = '+ ','.join([str(x) for x in nb]) + '; poverlap = ' + ','.join([str(x) for x in p])
    plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nb, p, cls_metric, cls_method, [])

## 2. Incorporating GCNA into mapper

### Use eigengene expression profiles to color nodes

In [6]:
eigenGene_file = 'data/GSE72056_95_eigGene.txt'
eigenGene = np.loadtxt(eigenGene_file, delimiter=',')
num_eigenGene = 2

In [7]:
filter_method = 'eccentricity'
filter_par = 'correlation'
cover_method = 'equal_sample'
nbins = [30]
poverlap = [0.5]
cls_metric = 'correlation'
cls_method = 'single'

In [8]:
for i in range(num_eigenGene):
    title = 'EigenGene ' + str(i+1)
    color_data = eigenGene[i,:].T
    color_data = color_data.reshape(color_data.shape[0], -1)

    plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nbins, poverlap, cls_metric, cls_method,
                color_data)

#### Specific gene sub-modules in eigengene 2

In [9]:
color_filename = 'data/GSE72056_95_melanosome_eigGene.txt'
title = 'melanosome'
color_data = np.loadtxt(color_filename, delimiter=',')
color_data = color_data.reshape(color_data.shape[0], -1)
plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nbins, poverlap, cls_metric, cls_method,
                color_data)

color_filename = 'data/GSE72056_95_migration_eigGene.txt'
title = 'migration'
color_data = np.loadtxt(color_filename, delimiter=',')
color_data = color_data.reshape(color_data.shape[0], -1)
plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nbins, poverlap, cls_metric, cls_method,
                color_data)

### Use eigengene expression profiles as filter functions
#### melanoma data set

In [10]:
label_idx = 2 #detailed cell types
label_vis = label[label_idx,:]
idx2label_vis = idx2label[label_idx]

title = 'tSNE'
n_comp = 2
metric = 'correlation'
vis_data = f_tSNE(geneExp, n_comp, metric)
plot_2d(title, vis_data, label_vis, idx2label_vis)

filter_method = 'pre_compute'
nbins = [30]
poverlap = [0.5]
for i in range(num_eigenGene):
    title = 'EigenGene '+str(i+1)
    filter_par = eigenGene[i,:].T
    filter_par = filter_par.reshape(filter_par.shape[0], -1)
    plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nbins, poverlap, cls_metric, cls_method,
                [])

nbins = [10,10]
poverlap = [0.3,0.3]
title = 'EigenGene 1 and EigenGene 2'
filter_par = eigenGene[0:2,:].T
plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
            cover_method, nbins, poverlap, cls_metric, cls_method,
            [])

#### pancreas data set

In [11]:
data_filename = "data/murano.csv"
geneExp_filename = 'data/murano_90.txt'
label_only = 1
num_labels = 1
take_log = 1

geneExp = np.loadtxt(geneExp_filename, delimiter=',')
label, label2idx, idx2label = read_csv(data_filename, label_only, num_labels, take_log)
label_idx = 0
label_vis = label[label_idx,:]
idx2label_vis = idx2label[label_idx]

eigenGene_file = 'data/murano_90_eigGene.txt'
eigenGene = np.loadtxt(eigenGene_file, delimiter=',')
num_eigenGene = 2

##### Without eigengenes

In [12]:
title = 'tSNE'
n_comp = 2
metric = 'correlation'
vis_data = f_tSNE(geneExp, n_comp, metric)
plot_2d(title, vis_data, label_vis, idx2label_vis)

In [13]:
title = 'mapper using eccentricity as filter function'
filter_method = 'eccentricity'
filter_par = 'correlation'
cover_method = 'equal_sample'
nbins = [30]
poverlap = [0.5]
cls_metric = 'correlation'
cls_method = 'single'

plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nbins, poverlap, cls_metric, cls_method,
                [])

##### With eigengenes

In [14]:
filter_method = 'pre_compute'
cover_method = 'equal_sample'
cls_metric = 'correlation'
nbins = [30]
poverlap = [0.5]

filter_par = eigenGene[1,:].T
filter_par = filter_par.reshape(filter_par.shape[0], -1)

cls_method = 'single'
title = 'Eigengene 2, linkage: single'
plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nbins, poverlap, cls_metric, cls_method,
                [])

cls_method = 'complete'
title = 'Eigengene 2, linkage: complete'
plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nbins, poverlap, cls_metric, cls_method,
                [])

##### Using maker genes to color nodes

In [15]:
marker_genes = ["GCG", "INS", "SST", "PPY", "KRT19", "PRSS1"]

filter_method = 'pre_compute'
cover_method = 'equal_sample'
nbins = [30]
poverlap = [0.5]
cls_method = 'single'
cls_metric = 'correlation'
eigenGene_idx = 2

label_only = 0
num_labels = 1
take_log = 1
geneExp_marker, geneSym_marker, label, label2idx, idx2label = read_csv(data_filename, label_only, num_labels, take_log)

geneExp = np.loadtxt(geneExp_filename, delimiter=',')
eigenGene = np.loadtxt(eigenGene_file, delimiter=',')
filter_par = eigenGene[eigenGene_idx-1,:].T
filter_par = filter_par.reshape(filter_par.shape[0], -1)

0


In [16]:
for m in marker_genes:
    title = m
    color_data = geneExp_marker[geneSym_marker.index(m),:]
    color_data = color_data.reshape(color_data.shape[0], -1)

    plot_mapper(title, geneExp, label_vis, idx2label_vis, filter_method, filter_par,
                cover_method, nbins, poverlap, cls_metric, cls_method,
                color_data)