In [1]:
import numpy as np
import pandas as pd
import sompy
from load_data import *
from util import *

In [2]:
path = '/media/clo302/3TB_RAID/data/tcga_embedding/'
ref_path = '/media/clo302/3TB_RAID/ref/TCGA_RNASeq/'
rawlog2 = load_raw(ref_path)
rawlog2.shape

(9544, 20531)

In [3]:
_, genes, _, sids_CN = load_data(path,  ['gemb_CN.csv', 'semb_CN.csv'], return_rn=True)

(20531, 50)
(9544, 50)


In [4]:
gene2idx = {g:i for i, g in enumerate(genes)}
sid2ca, ca_cats = load_sid2ca(ref_path)

len: 10245


### SOM

In [5]:
som_g_pca = sompy.SOMFactory.build(rawlog2, mapsize=[1,50], initialization='pca')
som_g_pca.train(n_job=16, verbose='info')
som_g_wgts_pca = pd.DataFrame(som_g_pca.codebook.matrix.T, index=genes)
print(som_g_wgts_pca.shape)
som_g_wgts_pca.to_csv(path+'som_pca_gwgts.csv')

 Training...
 pca_linear_initialization took: 6.996000 seconds
 Rough training...
 radius_ini: 4.000000 , radius_final: 1.000000, trainlen: 2

 epoch: 1 ---> elapsed time:  6.442000, quantization error: 135.144318

 epoch: 2 ---> elapsed time:  6.152000, quantization error: 134.785753

 Finetune training...
 radius_ini: 1.000000 , radius_final: 1.000000, trainlen: 3

 epoch: 1 ---> elapsed time:  5.320000, quantization error: 127.900612

 epoch: 2 ---> elapsed time:  5.571000, quantization error: 116.401711

 epoch: 3 ---> elapsed time:  5.668000, quantization error: 113.340173

 Final quantization error: 113.340173
 train took: 36.959000 seconds


(20531, 50)


In [6]:
som_g_rand = sompy.SOMFactory.build(rawlog2, mapsize=[1,50], initialization='random')
som_g_rand.train(n_job=16, verbose='info')
som_g_wgts_rand = pd.DataFrame(som_g_rand.codebook.matrix.T, index=genes)
print(som_g_wgts_rand.shape)
som_g_wgts_rand.to_csv(path+'som_rand_gwgts.csv')

 Training...
 random_initialization took: 0.517000 seconds
 Rough training...
 radius_ini: 9.000000 , radius_final: 1.500000, trainlen: 2

 epoch: 1 ---> elapsed time:  5.514000, quantization error: 621.758059

 epoch: 2 ---> elapsed time:  5.552000, quantization error: 138.291797

 Finetune training...
 radius_ini: 2.083333 , radius_final: 1.000000, trainlen: 3

 epoch: 1 ---> elapsed time:  5.040000, quantization error: 132.991429

 epoch: 2 ---> elapsed time:  5.482000, quantization error: 125.213353

 epoch: 3 ---> elapsed time:  5.472000, quantization error: 118.466952

 Final quantization error: 118.466952
 train took: 28.119000 seconds


(20531, 50)


In [7]:
som_s_pca = sompy.SOMFactory.build(rawlog2.T, mapsize=[1,50], initialization='pca')
som_s_pca.train(n_job=16, verbose='info')
som_s_wgts_pca = pd.DataFrame(som_s_pca.codebook.matrix.T, index=sids_CN)
print(som_s_wgts_pca.shape)
som_s_wgts_pca.to_csv(path+'som_pca_swgts.csv')

 Training...
 pca_linear_initialization took: 6.769000 seconds
 Rough training...
 radius_ini: 4.000000 , radius_final: 1.000000, trainlen: 1

 epoch: 1 ---> elapsed time:  1.003000, quantization error: 95.013798

 Finetune training...
 radius_ini: 1.000000 , radius_final: 1.000000, trainlen: 1

 epoch: 1 ---> elapsed time:  0.629000, quantization error: 55.887064

 Final quantization error: 55.887064
 train took: 9.109000 seconds


(9544, 50)


In [8]:
som_s_rand = sompy.SOMFactory.build(rawlog2.T, mapsize=[1,50], initialization='random')
som_s_rand.train(n_job=16, verbose='info')
som_s_wgts_rand = pd.DataFrame(som_s_rand.codebook.matrix.T, index=sids_CN)
print(som_s_wgts_rand.shape)
som_s_wgts_rand.to_csv(path+'som_rand_swgts.csv')

 Training...
 random_initialization took: 0.494000 seconds
 Rough training...
 radius_ini: 9.000000 , radius_final: 1.500000, trainlen: 1

 epoch: 1 ---> elapsed time:  1.433000, quantization error: 174.613934

 Finetune training...
 radius_ini: 2.083333 , radius_final: 1.000000, trainlen: 2

 epoch: 1 ---> elapsed time:  0.652000, quantization error: 57.315367

 epoch: 2 ---> elapsed time:  0.631000, quantization error: 37.174060

 Final quantization error: 37.174060
 train took: 3.642000 seconds


(9544, 50)
