In [None]:
!pip install activeSVC==4.0.1
!pip install psutil
!pip install h5py

In [None]:
import numpy as np
import time
import random
import os

from sklearn.preprocessing import normalize 
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
from matplotlib import gridspec

from activeSVC import min_complexity, min_acquisition
import pandas as pd
import h5py
import pickle
import os, psutil
import resource


def text_create(path, name, msg):
    full_path = path + "/" + name + '.pickle'
    f=open(full_path,'wb') 
    pickle.dump(msg,f)
    f.close()

class TimerError(Exception):
     """A custom exception used to report errors in use of Timer class"""

class Timer:
    def __init__(self):
        self._start_time = None

    def start(self):
        if self._start_time is not None:
            raise TimerError(f"Timer is running. Use .stop() to stop it")

        self._start_time = time.perf_counter()

    def stop(self):
        if self._start_time is None:
            raise TimerError(f"Timer is not running. Use .start() to start it")

        elapsed_time = time.perf_counter() - self._start_time
        self._start_time = None
        print(f"Total run time: {elapsed_time:0.4f} seconds")
        return elapsed_time
        




# Download and Load Brain Data

In [None]:


df = pd.read_csv('./data/clusters10.csv', index_col=0)
y=df['Cluster'].values
classes = np.unique(y)
keys=classes

f = h5py.File('./data/1M_neurons_filtered_gene_bc_matrices_h5.h5', 'r')
h5data=f['mm10']
gene_names=h5data['gene_names']
shape=h5data['shape']

f = h5py.File('./data/norm_1M_neurons_filtered_gene_bc_matrices_h5.h5', 'r')
h5data=f['mm10']
data_cell=h5data['data']
indices_cell=h5data['indices']
indptr_cell = h5data['indptr']

f = h5py.File('./data/norm_transpose_1M_neurons_filtered_gene_bc_matrices_h5.h5', 'r')
h5data=f['mm10']
data_gene=h5data['data']
indices_gene=h5data['indices']
indptr_gene = h5data['indptr']


genelist=np.array(gene_names)
gene_name=[]
for i in range(genelist.shape[0]):
    gene_name.append(genelist[i].decode())
gene_name=np.array(gene_name)

idx = np.arange(shape[1])
random.shuffle(idx)
idx_train=idx[:int(shape[1]*4/5)]
idx_test=idx[int(shape[1]*4/5):]

# min_complexity

## Define Parameters and Work Space

In [None]:
'''
Parameters
'''
num_features = 50
num_samples=50
init_samples=50
balance=False

folder='results/min_complexity'
path=folder+'/test_'+str(num_features)+'_'+str(num_samples)

try:
    os.mkdir('results')
except OSError:
    print ("Creation of the directory %s failed" % 'results')
else:
    print ("Successfully created the directory %s " % 'results')
try:
    os.mkdir(folder)
except OSError:
    print ("Creation of the directory %s failed" % folder)
else:
    print ("Successfully created the directory %s " % folder)
try:
    os.mkdir(path)
except OSError:
    print ("Creation of the directory %s failed" % path)
else:
    print ("Successfully created the directory %s " % path)

## Select Genes and Save Results

In [None]:
if __name__ == '__main__':

    t=Timer()
    t.start()
    feature_selected, num_samples_list, train_errors,test_errors,train_scores,test_scores,step_times= min_complexity_h5py(
        data_cell,indices_cell,indptr_cell,data_gene,indices_gene,indptr_gene, y, shape,idx_train,idx_test, 
        num_features=num_features,num_samples=num_samples,init_features=1,init_samples=init_samples, balance=balance)
    elapsed_time=t.stop()
    
    memorys=[]
    memorys.append(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 2)
    memorys.append(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/ 1024 ** 2)
    
    text_create(path,'feature_selected',feature_selected)
    text_create(path,'error',train_errors+test_errors)
    text_create(path,'accuracy',train_scores+test_scores)
    text_create(path,'num_samples_list',num_samples_list)
    text_create(path,'genes_name',gene[feature_selected])
    text_create(path,'elapsed_time',elapsed_time)
    text_create(path,'memory',memorys)

    plt.figure(figsize=(8,8))
    plt.plot(train_scores,linewidth=2)
    plt.plot(test_scores,linewidth=2)
    plt.legend(['train acc','test acc'],prop = {'size':18})
    plt.xlabel('number of genes',fontdict={'weight':'normal','size': 18})
    plt.ylabel('accuracy',fontdict={'weight':'normal','size': 18})
    plt.tick_params(labelsize=18)
    plt.savefig(path+'/acc.pdf', bbox_inches="tight")

# min_aquisition

## Define Parameters and Work Space

In [None]:
'''
Parameters
'''
num_features = 50
num_samples=1000
init_samples=1000

folder='results/min_acquisition'
path=folder+'/test_'+str(num_features)+'_'+str(num_samples)

try:
    os.mkdir('results')
except OSError:
    print ("Creation of the directory %s failed" % 'results')
else:
    print ("Successfully created the directory %s " % 'results')
try:
    os.mkdir(folder)
except OSError:
    print ("Creation of the directory %s failed" % folder)
else:
    print ("Successfully created the directory %s " % folder)
try:
    os.mkdir(path)
except OSError:
    print ("Creation of the directory %s failed" % path)
else:
    print ("Successfully created the directory %s " % path)

## Select Genes and Save Results

In [None]:
if __name__ == '__main__':

    t=Timer()
    t.start()
    feature_selected, num_samples_list, samples_global, train_errors,test_errors,train_scores,test_scores,step_times= min_acquisition_h5py(
        data_cell,indices_cell,indptr_cell,data_gene,indices_gene,indptr_gene, y, shape,idx_train,idx_test,
        num_features=num_features,num_samples=num_samples,init_features=1,init_samples=init_samples,balance=balance)
    elapsed_time=t.stop()
    
    memorys=[]
    memorys.append(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 2)
    memorys.append(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/ 1024 ** 2)
    
    text_create(path,'feature_selected',feature_selected)
    text_create(path,'error',train_errors+test_errors)
    text_create(path,'accuracy',train_scores+test_scores)
    text_create(path,'num_samples_list',num_samples_list)
    text_create(path,'samples_global',samples_global)
    text_create(path,'genes_name',gene[feature_selected])
    text_create(path,'elapsed_time',elapsed_time)
    text_create(path,'memory',memorys)

    plt.figure(figsize=(8,8))
    plt.plot(train_scores,linewidth=2)
    plt.plot(test_scores,linewidth=2)
    plt.legend(['train acc','test acc'],prop = {'size':18})
    plt.xlabel('number of genes',fontdict={'weight':'normal','size': 18})
    plt.ylabel('accuracy',fontdict={'weight':'normal','size': 18})
    plt.tick_params(labelsize=18)
    plt.savefig(path+'/acc.pdf', bbox_inches="tight")

    plt.figure(figsize=(8,5))
    plt.plot(num_samples_list,linewidth=5)
    plt.xlabel('number of genes',fontdict={'weight':'normal','size': 18})
    plt.ylabel('number of cells acquired',fontdict={'weight':'normal','size': 18})
    plt.tick_params(labelsize=18)
    plt.savefig(path+'/cells.pdf', bbox_inches="tight")

# Plots

## Sampling

In [None]:
sampling=np.arange(shape[1])
random.shuffle(sampling)

matrix=index_cell(data_cell,indices_cell,indptr_cell, shape, sampling[:30000]).toarray()
target=y[sampling[:30000]]
import pickle
f=open(path+'/sampling.pickle','wb') 
pickle.dump(sampling,f)
f.close()

## T-SNE

In [None]:
plt.figure(figsize=(9,6))
c_cmap = plt.get_cmap('tab10', len(classes))
plt.scatter(tsne1[sampling[:30000]],tsne2[sampling[:30000]],c=target,s=1,cmap=c_cmap)
cbar=plt.colorbar(ticks=np.arange(1,len(classes)+1))
plt.xticks([])
plt.yticks([])
cbar.ax.set_yticklabels(keys,fontdict={'weight':'normal','size': 18})
plt.savefig(path+'/tsne.png',dpi=300,  facecolor='white',bbox_inches="tight")

## Gene Markers

In [None]:
alpha = 10000
num_col=10
data_show = np.log1p(matrix*alpha)

size=len(feature_selected)
plt.figure(figsize=(3*num_col,2.5*(int(size/num_col)+1)))
gs = gridspec.GridSpec(int(size/num_col)+1, num_col, width_ratios=[7]*num_col)

for i in range(size):
    f=feature_selected[i]
    ax0 = plt.subplot(gs[i])
    ax0.set_title(gene_name[f],fontdict={'weight':'normal','size': 18})
    plt.scatter(tsne1[sampling[:30000]],tsne2[sampling[:30000]],c=data_show[:,f],s=0.01)
    ax0.set_xticks([])
    ax0.set_yticks([])
plt.savefig(path+'/genes.png',dpi=300,  facecolor='white',bbox_inches="tight")   


