In [1]:
%run ../../Utils/yp_utils.py

# Initial setup

In [2]:
paper_pmid = 18420932
paper_name = 'hillenmeyer_giaever_2008' 

In [3]:
datasets = pd.read_csv('extras/YeastPhenome_' + str(paper_pmid) + '_datasets_list.txt', sep='\t', header=None, names=['dataset_id', 'name'])

In [4]:
datasets.set_index('dataset_id', inplace=True)

# Load & process the data

In [43]:
original_data = pd.read_csv('raw_data/hom.ratio_result_nm.pub', sep='\t')

In [44]:
print('Original data dimensions: %d x %d' % (original_data.shape))

Original data dimensions: 4769 x 419


In [45]:
original_data.head()

Unnamed: 0,Orf,01_05_31_04:37 degrees C::::::5gen:hom_05_01:old scanner,01_06_08_04:25 degrees C::::::5gen:hom_05_01:old scanner,01_06_13_05:37 degrees C::::::20gen:hom_05_01:old scanner,01_06_13_07:37 degrees C::::::20gen:hom_05_01:old scanner,01_06_14_02:25 degrees C::::::15gen:hom_05_01:old scanner,01_06_14_05:YP glycerol::::::5gen:hom_05_01:old scanner,01_06_14_07:YP glycerol::::::15gen:hom_05_01:old scanner,01_06_19_04:37 degrees C::::::5gen:hom_05_01:old scanner,01_06_26_08:23 degrees C::::::5gen:hom_05_01:old scanner,...,05_03_25_03:pH7.5::::::-5gen:hom_09_02:new scanner,05_03_25_04:pH7.5:::FK506:1:ug/ml:-5gen:hom_09_02:new scanner,05_04_20_01:LiCl:100:mm::::-5gen:hom_09_02:new scanner,05_05_13_07:benzaldehyde:0.003:%::::-5gen:hom_09_02:new scanner,05_05_13_09:potassium disulfite:2500:um::::-5gen:hom_09_02:new scanner,05_05_13_10:curcumin:150:um::::-5gen:hom_09_02:new scanner,05_05_13_11:thimerosal:125:nm::::-5gen:hom_09_02:new scanner,05_05_13_12:bisphenol:125:um::::-5gen:hom_09_02:new scanner,05_08_09_05:myriocin:0.2:ug/ml::::-5gen:hom_09_02:new scanner,05_08_09_06:aureobasidin A:0.02:ug/ml::::-5gen:hom_09_02:new scanner
0,YAL002W:chr1_1,0.364085,0.217003,4.83093,0.72646,-0.050712,0.348772,0.794952,0.514168,-0.055234,...,-0.368424,-0.125664,-0.130201,0.028618,0.462203,-0.031867,-0.09801,0.005553,-1.11394,-0.212757
1,YAL004W:chr1_1,0.033485,-0.0121,0.195237,-0.145415,-0.271582,-0.206185,-0.145804,-0.093771,-0.160824,...,-0.104239,-0.144162,-0.136239,-0.044624,-0.152248,-0.312474,-0.300317,-0.139599,-0.218166,-0.315431
2,YAL005C:chr1_1,0.139571,-0.189006,0.23205,0.322059,0.200959,0.033194,0.316956,-0.192362,-0.014067,...,0.009421,-0.383229,0.11052,0.101179,0.061872,-0.107803,0.090859,0.205231,-0.776672,-0.386412
3,YAL007C:chr1_1,0.003243,-0.196491,0.017423,0.32107,0.096454,-0.013857,0.224222,-0.189666,-0.144568,...,0.1106,-0.012451,0.259923,-0.177298,-0.006372,-0.377835,0.113757,-0.015115,-0.224341,-0.239957
4,YAL008W:chr1_1,-0.141538,-0.124237,-0.179533,0.170207,-0.146,0.116082,0.11485,0.042651,-0.19823,...,0.14714,-0.033892,-0.08258,-0.047452,0.10194,-0.251088,0.069136,-0.131789,0.007551,-0.202616


In [46]:
original_data['orf'] = original_data['Orf'].apply(lambda x: x.split(':')[0])

In [47]:
# Eliminate all white spaces & capitalize
original_data['orf'] = clean_orf(original_data['orf'])

In [48]:
# Translate to ORFs 
original_data['orf'] = translate_sc(original_data['orf'], to='orf')

In [49]:
# Make sure everything translated ok
t = looks_like_orf(original_data['orf'])
print(original_data.loc[~t,])

Empty DataFrame
Columns: [Orf, 01_05_31_04:37 degrees C::::::5gen:hom_05_01:old scanner, 01_06_08_04:25 degrees C::::::5gen:hom_05_01:old scanner, 01_06_13_05:37 degrees C::::::20gen:hom_05_01:old scanner, 01_06_13_07:37 degrees C::::::20gen:hom_05_01:old scanner, 01_06_14_02:25 degrees C::::::15gen:hom_05_01:old scanner, 01_06_14_05:YP glycerol::::::5gen:hom_05_01:old scanner, 01_06_14_07:YP glycerol::::::15gen:hom_05_01:old scanner, 01_06_19_04:37 degrees C::::::5gen:hom_05_01:old scanner, 01_06_26_08:23 degrees C::::::5gen:hom_05_01:old scanner, 01_06_26_10:YP glycerol::::::5gen:hom_05_01:old scanner, 01_06_28_04:YP glycerol::::::15gen:hom_05_01:old scanner, 01_06_29_03:23 degrees C::::::15gen:hom_05_01:old scanner, 01_07_17_03:23 degrees C::::::15gen:hom_05_01:old scanner, 01_07_24_08:37 degrees C::::::5gen:hom_05_01:old scanner, 01_07_24_09:37 degrees C::::::20gen:hom_05_01:old scanner, 01_07_27_18:pH8::::::20gen:hom_05_01:old scanner, 01_07_27_19:sorbitol:1.5e+06:um::::5gen:hom_0

In [50]:
original_data.set_index('orf', inplace=True)

In [51]:
original_data.drop(columns=['Orf'], inplace=True)

In [52]:
original_data = original_data.apply(pd.to_numeric, axis=1, errors='coerce')

In [53]:
original_data = original_data.groupby(original_data.index).mean()

In [54]:
original_data.shape

(4717, 418)

## Load dataset ids

In [55]:
dt = pd.read_csv('extras/datasets.txt', sep='\t', header=None)

In [56]:
dt[0] = pd.to_numeric(dt[0], errors='coerce')
dt.head()

Unnamed: 0,0,1
0,779.0,"04_02_11_14:1,3-diallylurea:10000:um::::20gen:..."
1,778.0,"04_01_14_05:1,3-diallylurea:20000:um::::20gen:..."
2,780.0,"03_12_09_02:1,4-dimethylendothall:100:um::::20..."
3,781.0,"04_01_14_06:1,7-octadiene:0.25:%::::20gen:hom_..."
4,782.0,"04_02_11_13:1,8-nonadiene:0.49:%::::20gen:hom_..."


In [60]:
dt_ids = np.array([dt.loc[dt[1]==c,0].values[0] for c in original_data.columns.values])

In [61]:
dt_ids[np.isnan(dt_ids)]

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])

In [62]:
original_data.columns = dt_ids

In [66]:
original_data = original_data.loc[:,~np.isnan(original_data.columns)]

In [68]:
original_data.columns = original_data.columns.values.astype(int)

In [69]:
original_data.shape

(4717, 408)

In [97]:
original_data = original_data.T
original_data = original_data.groupby(original_data.index).mean()
original_data = original_data.T

In [98]:
original_data.shape

(4717, 273)

# Load Het data

In [71]:
original_data2 = pd.read_csv('raw_data/het.ratio_result_nm.pub', sep='\t')

In [72]:
print('Original data dimensions: %d x %d' % (original_data2.shape))

Original data dimensions: 5984 x 727


In [73]:
original_data2.head()

Unnamed: 0,Orf,01_04_24_02:benomyl:6.9:um::::15gen:het_04_01_2:old scanner,01_04_24_03:nocodazole:6.9:um::::15gen:het_04_01_2:old scanner,01_04_24_04:benomyl:13.8:um::::15gen:het_04_01_2:old scanner,01_05_08_02:nocodazole:6.6:um::::15gen:het_04_01_2:old scanner,01_05_11_02:tunicamycin:0.6:um::::5gen:het_04_01_2:old scanner,01_05_11_03:tunicamycin:0.15:um::::5gen:het_04_01_2:old scanner,01_05_11_05:5-fluorouracil:19.2:um::::20gen:het_04_01_2:old scanner,01_05_15_02:nocodazole:6.6:um::::5gen:het_04_01_2:old scanner,01_05_15_05:tunicamycin:0.15:um::::15gen:het_04_01_2:old scanner,...,05_05_13_05:thimerosal:125:nm::::20gen:het_06_03:new scanner,05_05_13_06:bisphenol:8.75:um::::20gen:het_06_03:new scanner,05_05_13_13:motuporamine:7.5:um::::20gen:het_06_03:new scanner,05_05_19_01:PP1:3:um::::20gen:het_06_03:new scanner,05_05_19_02:PP2:3:um::::20gen:het_06_03:new scanner,05_06_29_11:motuporamine:15:um::::20gen:het_06_03:new scanner,05_08_09_01:motuporamine:15:um::::20gen:het_06_03:new scanner,05_08_09_03:myriocin:0.15:ug/ml::::20gen:het_06_03:new scanner,05_08_09_04:aureobasidin A:0.02:ug/ml::::20gen:het_06_03:new scanner,05_08_12_15:motuporamine:15:um::::20gen:het_06_03:new scanner
0,YAL001C:chr1_1,-0.131636,-0.212345,-0.042362,0.238914,0.050857,-0.229736,-0.134586,-0.005207,0.141291,...,-0.250066,0.456222,-0.140851,-0.101573,0.090297,-0.903738,-0.24231,-0.77602,-0.505625,-0.307232
1,YAL002W:chr1_1,0.037294,-0.630956,7.5e-05,-0.009538,-0.286773,-0.065064,0.409466,-0.038485,-0.967149,...,-0.225882,0.072349,-0.357965,0.242942,-0.420637,-1.16985,0.298605,-1.62669,-0.759521,-0.811517
2,YAL003W:chr1_1,-0.293846,-0.211969,-0.141436,0.028114,0.065929,-0.085837,-0.004104,0.205367,-0.005246,...,-0.236016,-0.08419,-0.410695,0.195081,0.048692,-0.366164,-0.151471,-0.593138,-0.245003,-0.164903
3,YAL004W:chr1_1,0.025424,-0.060256,0.078768,0.082459,0.052538,-0.041104,-0.106782,-0.126677,0.280637,...,0.185836,0.033696,-0.139939,-0.028618,0.049193,0.198638,-0.075491,-0.53434,-0.651918,-0.103441
4,YAL005C:chr1_1,-0.144465,-0.202685,-0.334742,0.05328,0.150092,-0.093609,-1.14488,0.260643,2.50213,...,1.05705,-0.040865,-0.228768,-0.177879,-0.155201,-0.747556,0.485199,-0.608983,-1.68206,2.25136


In [74]:
original_data2['orf'] = original_data2['Orf'].apply(lambda x: x.split(':')[0])

In [75]:
# Eliminate all white spaces & capitalize
original_data2['orf'] = clean_orf(original_data2['orf'])

In [76]:
# Translate to ORFs 
original_data2['orf'] = translate_sc(original_data2['orf'], to='orf')

In [77]:
# Make sure everything translated ok
t = looks_like_orf(original_data2['orf'])
print(original_data2.loc[~t,])

Empty DataFrame
Columns: [Orf, 01_04_24_02:benomyl:6.9:um::::15gen:het_04_01_2:old scanner, 01_04_24_03:nocodazole:6.9:um::::15gen:het_04_01_2:old scanner, 01_04_24_04:benomyl:13.8:um::::15gen:het_04_01_2:old scanner, 01_05_08_02:nocodazole:6.6:um::::15gen:het_04_01_2:old scanner, 01_05_11_02:tunicamycin:0.6:um::::5gen:het_04_01_2:old scanner, 01_05_11_03:tunicamycin:0.15:um::::5gen:het_04_01_2:old scanner, 01_05_11_05:5-fluorouracil:19.2:um::::20gen:het_04_01_2:old scanner, 01_05_15_02:nocodazole:6.6:um::::5gen:het_04_01_2:old scanner, 01_05_15_05:tunicamycin:0.15:um::::15gen:het_04_01_2:old scanner, 01_05_15_06:tunicamycin:0.6:um::::15gen:het_04_01_2:old scanner, 01_05_15_07:benomyl:27.6:um::::20gen:het_04_01_2:old scanner, 01_05_16_02:benomyl:3.4:um::::15gen:het_04_01_2:old scanner, 01_05_16_04:benomyl:2.4:um::::5gen:het_04_01_2:old scanner, 01_05_16_05:ketoconazole:11.8:um::::5gen:het_04_01_2:old scanner, 01_05_22_06:hygromycin:0.4:um::::5gen:het_04_01_2:old scanner, 01_05_31_03:37

In [78]:
original_data2.set_index('orf', inplace=True)

In [79]:
original_data2.drop(columns=['Orf'], inplace=True)

In [80]:
original_data2 = original_data2.apply(pd.to_numeric, axis=1, errors='coerce')

In [81]:
original_data2 = original_data2.groupby(original_data2.index).mean()

In [82]:
original_data2.shape

(5887, 726)

## Load dataset ids

In [87]:
dt2 = pd.read_csv('extras/datasets_het.txt', sep='\t')

In [88]:
dt2.shape

(726, 3)

In [89]:
dt2.head()

Unnamed: 0,Name,Conditionset,Dataset
0,"04_02_11_07:1,3-diallylurea:5000:um::::20gen:h...",8406,12265
1,"04_01_21_05:1,3-diallylurea:10000:um::::20gen:...",752,12215
2,"03_12_09_06:1,4-dimethylendothall:100:um::::20...",753,12216
3,"03_12_09_08:1,4-dimethylendothall:500:um::::20...",8408,12267
4,"03_12_09_07:1,4-dimethylendothall:1000:um::::2...",8407,12266


In [90]:
dt2['Dataset'] = pd.to_numeric(dt2['Dataset'], errors='coerce')
dt2.head()

Unnamed: 0,Name,Conditionset,Dataset
0,"04_02_11_07:1,3-diallylurea:5000:um::::20gen:h...",8406,12265
1,"04_01_21_05:1,3-diallylurea:10000:um::::20gen:...",752,12215
2,"03_12_09_06:1,4-dimethylendothall:100:um::::20...",753,12216
3,"03_12_09_08:1,4-dimethylendothall:500:um::::20...",8408,12267
4,"03_12_09_07:1,4-dimethylendothall:1000:um::::2...",8407,12266


In [92]:
dt_ids2 = np.array([dt2.loc[dt2['Name']==c,'Dataset'].values[0] for c in original_data2.columns.values])

In [94]:
original_data2.columns = dt_ids2

In [95]:
original_data2 = original_data2.loc[:,original_data2.columns>0]

In [96]:
original_data2.shape

(5887, 721)

In [99]:
original_data2 = original_data2.T
original_data2 = original_data2.groupby(original_data2.index).mean()
original_data2 = original_data2.T

In [100]:
original_data2.shape

(5887, 507)

# Merge

In [101]:
original_data = original_data.join(original_data2, how='outer')

In [102]:
original_data.shape

(5888, 780)

In [103]:
original_data.head()

Unnamed: 0_level_0,778,779,780,781,782,783,784,785,786,787,...,12617,12618,12619,12620,12621,12622,12623,12624,12625,12626
orf,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
YAL001C,,,,,,,,,,,...,0.050857,-0.056812,0.296919,0.215062,-0.018238,0.031422,0.098524,-0.139622,-0.135583,-0.332011
YAL002W,-2.27125,-0.885406,-0.423706,-0.529279,-0.703219,-2.49846,0.143344,-0.055234,-0.758126,0.217003,...,-0.286773,-0.22144,0.472599,0.226105,0.583654,-0.261767,1.02164,-0.538731,0.47185,2.73675
YAL003W,,,,,,,,,,,...,0.065929,-0.135917,-0.086778,-0.051601,0.155652,-0.363792,0.00576,-0.160483,-0.064899,-0.097966
YAL004W,-0.424613,-0.151793,-0.061239,-0.206722,0.00474,-0.330591,-0.107357,-0.160824,-0.283584,-0.0121,...,0.052538,0.053578,-0.01372,-0.426523,0.350106,-0.169926,0.198097,-0.196637,0.27615,0.536735
YAL005C,-1.18578,-0.277463,-0.171724,-0.468994,-0.02795,-0.947262,-0.124123,-0.014067,0.249295,-0.189006,...,0.150092,-0.170443,0.813991,0.173497,3.03586,0.789079,0.00425,-0.420168,-1.719465,-2.567455


In [117]:
# Taking the opposite because the original values are log2(control/treatment)
original_data = -original_data

# Prepare the final dataset

In [118]:
data = original_data.copy()

In [119]:
dataset_ids = original_data.columns.values
datasets = datasets.reindex(index=dataset_ids)

In [120]:
lst = [datasets.index.values, ['value']*datasets.shape[0]]
tuples = list(zip(*lst))
idx = pd.MultiIndex.from_tuples(tuples, names=['dataset_id','data_type'])
data.columns = idx

In [121]:
data.head()

dataset_id,778,779,780,781,782,783,784,785,786,787,...,12617,12618,12619,12620,12621,12622,12623,12624,12625,12626
data_type,value,value,value,value,value,value,value,value,value,value,...,value,value,value,value,value,value,value,value,value,value
orf,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
YAL001C,,,,,,,,,,,...,-0.050857,0.056812,-0.296919,-0.215062,0.018238,-0.031422,-0.098524,0.139622,0.135583,0.332011
YAL002W,2.27125,0.885406,0.423706,0.529279,0.703219,2.49846,-0.143344,0.055234,0.758126,-0.217003,...,0.286773,0.22144,-0.472599,-0.226105,-0.583654,0.261767,-1.02164,0.538731,-0.47185,-2.73675
YAL003W,,,,,,,,,,,...,-0.065929,0.135917,0.086778,0.051601,-0.155652,0.363792,-0.00576,0.160483,0.064899,0.097966
YAL004W,0.424613,0.151793,0.061239,0.206722,-0.00474,0.330591,0.107357,0.160824,0.283584,0.0121,...,-0.052538,-0.053578,0.01372,0.426523,-0.350106,0.169926,-0.198097,0.196637,-0.27615,-0.536735
YAL005C,1.18578,0.277463,0.171724,0.468994,0.02795,0.947262,0.124123,0.014067,-0.249295,0.189006,...,-0.150092,0.170443,-0.813991,-0.173497,-3.03586,-0.789079,-0.00425,0.420168,1.719465,2.567455


## Subset to the genes currently in SGD

In [122]:
genes = pd.read_csv(path_to_genes, sep='\t', index_col='id')
genes = genes.reset_index().set_index('systematic_name')
gene_ids = genes.reindex(index=data.index.values)['id'].values
num_missing = np.sum(np.isnan(gene_ids))
print('ORFs missing from SGD: %d' % num_missing)

ORFs missing from SGD: 24


In [123]:
data['gene_id'] = gene_ids
data = data.loc[data['gene_id'].notnull()]
data['gene_id'] = data['gene_id'].astype(int)
data = data.reset_index().set_index(['gene_id','orf'])

data.head()

Unnamed: 0_level_0,dataset_id,778,779,780,781,782,783,784,785,786,787,...,12617,12618,12619,12620,12621,12622,12623,12624,12625,12626
Unnamed: 0_level_1,data_type,value,value,value,value,value,value,value,value,value,value,...,value,value,value,value,value,value,value,value,value,value
gene_id,orf,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1,YAL001C,,,,,,,,,,,...,-0.050857,0.056812,-0.296919,-0.215062,0.018238,-0.031422,-0.098524,0.139622,0.135583,0.332011
2,YAL002W,2.27125,0.885406,0.423706,0.529279,0.703219,2.49846,-0.143344,0.055234,0.758126,-0.217003,...,0.286773,0.22144,-0.472599,-0.226105,-0.583654,0.261767,-1.02164,0.538731,-0.47185,-2.73675
3,YAL003W,,,,,,,,,,,...,-0.065929,0.135917,0.086778,0.051601,-0.155652,0.363792,-0.00576,0.160483,0.064899,0.097966
1863,YAL004W,0.424613,0.151793,0.061239,0.206722,-0.00474,0.330591,0.107357,0.160824,0.283584,0.0121,...,-0.052538,-0.053578,0.01372,0.426523,-0.350106,0.169926,-0.198097,0.196637,-0.27615,-0.536735
4,YAL005C,1.18578,0.277463,0.171724,0.468994,0.02795,0.947262,0.124123,0.014067,-0.249295,0.189006,...,-0.150092,0.170443,-0.813991,-0.173497,-3.03586,-0.789079,-0.00425,0.420168,1.719465,2.567455


# Normalize

In [124]:
data_norm = normalize_phenotypic_scores(data, has_tested=True)

In [125]:
# Assign proper column names
lst = [datasets.index.values, ['valuez']*datasets.shape[0]]
tuples = list(zip(*lst))
idx = pd.MultiIndex.from_tuples(tuples, names=['dataset_id','data_type'])
data_norm.columns = idx

In [126]:
vals = data.values
vals_norm = data_norm.values

vals_norm[np.isnan(vals)] = np.nan

data_norm = pd.DataFrame(index=data_norm.index, columns=data_norm.columns, data=vals_norm)

In [127]:
data_all = data.join(data_norm)
data_all.head()

Unnamed: 0_level_0,dataset_id,778,779,780,781,782,783,784,785,786,787,...,12617,12618,12619,12620,12621,12622,12623,12624,12625,12626
Unnamed: 0_level_1,data_type,value,value,value,value,value,value,value,value,value,value,...,valuez,valuez,valuez,valuez,valuez,valuez,valuez,valuez,valuez,valuez
gene_id,orf,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1,YAL001C,,,,,,,,,,,...,0.011372,0.298955,-0.688466,-0.607084,-0.132349,-0.084872,-0.355709,0.34299,0.222588,0.584895
2,YAL002W,2.27125,0.885406,0.423706,0.529279,0.703219,2.49846,-0.143344,0.055234,0.758126,-0.217003,...,1.298996,0.867487,-1.153891,-0.649131,-2.165734,0.896415,-3.371057,1.421395,-1.32992,-5.284732
3,YAL003W,,,,,,,,,,,...,-0.046107,0.57214,0.328054,0.408256,-0.719806,1.237887,-0.052696,0.399357,0.04193,0.137238
1863,YAL004W,0.424613,0.151793,0.061239,0.206722,-0.00474,0.330591,0.107357,0.160824,0.283584,0.0121,...,0.004963,-0.082267,0.134503,1.8358,-1.376734,0.589028,-0.680963,0.497046,-0.82974,-1.076756
4,YAL005C,1.18578,0.277463,0.171724,0.468994,0.02795,0.947262,0.124123,0.014067,-0.249295,0.189006,...,-0.367082,0.691373,-2.058333,-0.448822,-10.450075,-2.620708,-0.047764,1.101034,4.270754,4.860637


# Print out

In [128]:
for f in ['value','valuez']:
    df = data_all.xs(f, level='data_type', axis=1).copy()
    df.columns = datasets['name'].values
    df = df.droplevel('gene_id', axis=0)
    df.to_csv(paper_name + '_' + f + '.txt', sep='\t')

# Save to DB

In [129]:
# from IO.save_data_to_db3 import *

In [130]:
# save_data_to_db(data_all, paper_pmid)