# Building ML-models with feature selection by filter combinations (strict CoV filter versions)

Preprocessing and feature selection is performed after splitting the data (therefore, only the samples in the training data are available for these steps). 

In [1]:
#import libraries
import pandas as pd
import numpy as np
from IPython.display import display

from functools import partial #to pass parameters to function inside another function
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, mutual_info_regression, f_regression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import linear_model
from sklearn import svm

In [2]:
#import self-made functions
%run /Users/mariekececilia/Documents/master_thesis_code/methods.ipynb

In [3]:
#set a random seed to make reproducible results (used in mutual information)
seed = 0

## Load and clean data

In [4]:
#load transcriptional and fluxomic data
gexp, flux = load_gerosa()

#clean the transcriptional data (remove duplicates)
gexp, groups = clean_gexp_g(gexp)

#define features
X = gexp

#define targets and extract their fluxes
target_labels = ['R_PPC', 'R_AKGDH', 'R_ICDHyr']
targets = dict()
for target_label in target_labels:
    targets[target_label] = flux[target_label]

## Create models/pipelines

Choose selection parameters:

In [5]:
cov_filters = [0.5, 0.9]
k_numbers = [10,20]

Choose learning algorithms:

In [6]:
algorithms = dict({
    'lr': linear_model.LinearRegression(),
    'lasso_a0.05': linear_model.Lasso(alpha = 0.05),
    'ridge_a0.05': linear_model.Ridge(alpha = 0.05)
    })

Make combinations:

In [7]:
#make combinations of learning algorithms and filters 
def get_pipelines(algorithm, cov_filters, k_numbers):
    models = dict()
    
    for cov_filter in cov_filters:
        
        # filter by CoV only
        steps = [
            ('filter', CoVSelector(p = cov_filter)),
            ('standardize', StandardScaler()),
            ('m', algorithm)
        ]
        models['v%.0f' % (cov_filter*100)] = Pipeline(steps=steps)        

        # filter by CoV and SelectKBest
        for k in k_numbers:
            
            #selection by mi
            steps = [
                ('filter', CoVSelector(p = cov_filter)),
                ('selection', SelectKBest(score_func = partial(
                    mutual_info_regression, 
                    random_state=seed), 
                                          k = k)), 
                ('standardize', StandardScaler()),
                ('m', algorithm)
            ]
            models['v%.0f_mi%.0f' % (cov_filter*100, k)] = Pipeline(steps=steps)

            #selection by f
            steps = [
                ('filter', CoVSelector(p = cov_filter)),
                ('selection', SelectKBest(score_func = f_regression, 
                                          k = k)),
                ('standardize', StandardScaler()),
                ('m', algorithm)
            ]
            models['v%.0f_f%.0f' % (cov_filter*100, k)] = Pipeline(steps=steps)
            
    return models

In [8]:
models = dict()

for name, algorithm in algorithms.items():
     models[name]= get_pipelines(algorithm, cov_filters = cov_filters, k_numbers = k_numbers)

## Learning

In [9]:
# evaluate the models and store results in nested dicts: target -> learning algorithm -> scores/preds of all selection combinations
scores = dict()
preds = dict()
for y_name, y in targets.items():
    model_scores = dict()
    model_preds = dict()
    for model_name, pipelines in models.items():
        model_scores[model_name] = compare_models(pipelines, X, y)
        model_preds[model_name] = get_models_preds(pipelines, X, y)
    scores[y_name] = model_scores
    preds[y_name] = model_preds

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

### Save data to use in other notebooks

Scores and predictions are visualized in the Notebook 'compare_models', which also merges the results with the results from all other models built.

In [10]:
scores_filter_strict = scores
preds_filter_strict = preds
%store scores_filter_strict
%store preds_filter_strict

Stored 'scores_filter_strict' (dict)
Stored 'preds_filter_strict' (dict)


### Scores and predictions

In [11]:
for y in targets.keys():
    for algorithm in algorithms.keys():
        print()
        print(y + '\t(target)') 
        print(algorithm, '\t(learning algorithm)') 
        print('–> summary of all cv split scores for each combination of selection methods:')
        df = scores[y][algorithm][1].sort_values(by = 'average')
        df.index.name = 'Selection combo'
        df.columns.name = 'Score'
        display(df)
        
        print()
        print(y + '\t(target)') 
        print(algorithm + '\t(learning algorithm)') 
        print('–> all individual cv-split scores for each combination of selection methods:')
        df = scores[y][algorithm][0]
        df.columns.name = 'Selection combo'
        display(df)
        
        print()
        print(y + '\t(target)') 
        print(algorithm + '\t(learning algorithm)') 
        print('–> all predictions for each combination of selection methods (+ actual values):')
        df = preds[y][algorithm]
        df.index.name = 'Test set'
        df.columns.name = 'Selection combo'
        display(df)


R_PPC	(target)
lr 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v50,0.277279,0.31083
v50_f20,0.410882,0.664849
v50_mi20,0.492968,0.826446
v90,0.547302,0.840779
v50_f10,0.598956,0.973113
v90_mi10,0.988003,1.533409
v90_f10,0.990921,1.598316
v90_mi20,1.003002,1.598467
v90_f20,1.076619,1.973893
v50_mi10,1.139668,1.804344



R_PPC	(target)
lr	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Acetate,0.191031,2.745166,0.608406,0.182148,0.477095,0.132563,0.328653,0.12164,0.536065,0.748998
Fructose,0.299097,0.19731,0.301007,0.377689,0.398427,0.334158,0.134304,0.038025,0.281687,0.039867
Galactose,1.062181,5.336438,3.136492,2.654502,2.115252,2.733386,5.019886,5.115509,5.205812,6.2666
Glucose,0.122427,0.108536,0.112142,0.042349,0.028318,0.103383,0.582737,0.129579,0.402267,0.25071
Glycerol,0.060724,0.603862,0.106875,0.378383,0.068784,0.584657,0.2761,1.184401,0.403956,0.375438
Gluconate,0.304691,0.008043,0.283155,0.150957,0.001585,0.272837,0.497107,0.447443,0.421643,0.545294
Pyruvate,0.145804,0.051801,0.20363,0.024804,0.075564,0.095289,0.353255,0.231967,0.05428,0.061919
Succinate,0.032279,0.066188,0.039944,0.132914,0.122032,0.122144,0.711983,0.658802,0.718307,0.324125



R_PPC	(target)
lr	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,1.774145,1.435229,6.644469,2.853546,1.450989,0.92771,1.538959,1.191067,1.989952,0.823087,0.445314
Fructose,3.545669,2.485169,2.846072,2.478397,2.206508,2.132978,2.360855,3.069473,3.680495,2.5469,3.687024
Galactose,0.377386,0.778239,2.391285,1.561055,1.379159,1.175653,1.408929,2.271823,2.307909,2.341988,2.742315
Glucose,2.453331,2.152977,2.187057,2.178211,2.557227,2.383858,2.199698,1.023684,2.135431,1.466437,1.838257
Glycerol,1.376469,1.292883,2.207666,1.523578,1.8973,1.471148,2.18123,1.756512,-0.253822,1.932501,1.893247
Gluconate,1.94335,1.35123,1.927721,1.39308,1.649989,1.940269,2.473567,2.909403,2.812888,2.76275,3.003047
Pyruvate,2.489449,2.852421,2.618405,2.996374,2.4277,2.67756,2.25223,1.610039,1.91198,2.354322,2.643594
Succinate,2.01611,1.951031,1.882669,1.935579,1.748142,1.77008,2.262366,0.580675,0.687893,0.567924,1.362639



R_PPC	(target)
lasso_a0.05 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v50_f20,0.345134,0.492407
v50,0.410549,0.54656
v50_mi20,0.467049,0.751104
v50_mi10,0.516952,0.791664
v50_f10,0.595663,1.082448
v90_mi10,0.73817,1.331292
v90_f20,0.746015,1.330333
v90,0.748443,1.377073
v90_f10,0.765923,1.441126
v90_mi20,0.76917,1.349602



R_PPC	(target)
lasso_a0.05	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Acetate,0.369865,0.603722,0.34318,0.076044,0.076121,0.11811,0.155732,0.115376,0.276686,0.275818
Fructose,0.347485,0.367076,0.342844,0.370475,0.374954,0.406303,0.402656,0.416896,0.417442,0.409253
Galactose,1.830487,2.544375,3.443115,2.432967,1.623484,4.383165,4.250666,4.569474,4.330737,4.257522
Glucose,0.030386,0.030048,0.002315,0.056103,0.030386,0.350355,0.35084,0.185594,0.34578,0.185694
Glycerol,0.176305,0.381857,0.250807,0.350039,0.160019,0.165286,0.147166,0.207812,0.147153,0.207814
Gluconate,0.136676,0.025185,0.12224,0.16339,0.136666,0.192009,0.308208,0.332337,0.331017,0.332123
Pyruvate,0.228342,0.006395,0.060235,0.109667,0.16018,0.233909,0.117612,0.211547,0.128553,0.211548
Succinate,0.164847,0.176957,0.20057,0.17771,0.199257,0.138411,0.172477,0.088345,0.17599,0.08835



R_PPC	(target)
lasso_a0.05	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,1.774145,2.43034,2.845235,2.382997,1.639232,1.639095,1.564601,1.497855,1.569452,1.283264,1.284805
Fructose,3.545669,2.313601,2.244138,2.330058,2.232087,2.216205,2.105052,2.117984,2.067495,2.065557,2.094594
Galactose,0.377386,1.068187,1.337598,1.676771,1.295555,0.990067,2.031532,1.981529,2.101843,2.011747,1.984117
Glucose,2.453331,2.527878,2.527049,2.45901,2.59097,2.527879,1.593794,1.592604,1.998008,1.605019,1.997763
Glycerol,1.376469,1.619146,1.902083,1.721697,1.858287,1.59673,1.603979,1.579038,1.662515,1.57902,1.662519
Gluconate,1.94335,2.20896,1.992294,1.705795,1.625827,2.208941,2.31649,2.542306,2.589197,2.586632,2.588782
Pyruvate,2.489449,3.057895,2.505369,2.639401,2.762459,2.888208,1.907145,2.196659,1.962813,2.169423,1.96281
Succinate,2.01611,1.68376,1.659346,1.611739,1.657826,1.614385,1.737057,1.668378,1.837996,1.661295,1.837988



R_PPC	(target)
ridge_a0.05 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v50,0.277312,0.310918
v50_f20,0.407472,0.666641
v50_mi20,0.491645,0.825997
v90,0.547392,0.841011
v50_f10,0.603619,1.06109
v90_f10,0.73151,1.353596
v90_f20,0.855344,1.451292
v90_mi10,0.867109,1.307321
v50_mi10,0.890291,1.53341
v90_mi20,0.978282,1.526055



R_PPC	(target)
ridge_a0.05	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Acetate,0.191031,1.287383,0.471047,0.176812,0.436506,0.132549,0.329762,0.071018,0.500308,0.584781
Fructose,0.299112,0.230825,0.303615,0.376501,0.397002,0.334243,0.364213,0.275922,0.347551,0.207323
Galactose,1.062456,4.80525,3.391723,2.652445,2.123242,2.734097,4.305604,4.306346,4.995464,4.677235
Glucose,0.122432,0.098185,0.103707,0.042429,0.029268,0.103401,0.39852,0.188229,0.399106,0.214534
Glycerol,0.060717,0.577647,0.116567,0.375042,0.076421,0.584656,0.292437,0.200016,0.406177,0.368586
Gluconate,0.304682,0.002282,0.195041,0.149696,2e-05,0.272826,0.422423,0.377416,0.398159,0.429014
Pyruvate,0.145792,0.055905,0.198899,0.026723,0.075423,0.095273,0.141849,0.19246,0.09421,0.109164
Succinate,0.032273,0.064856,0.048352,0.133509,0.121895,0.122089,0.682068,0.240672,0.68528,0.252118



R_PPC	(target)
ridge_a0.05	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,1.774145,1.435229,4.058151,2.609851,1.460455,0.99972,1.538985,1.1891,1.648149,0.886526,0.736658
Fructose,3.545669,2.485117,2.727241,2.469152,2.210719,2.138032,2.360554,2.25429,2.56734,2.313368,2.810571
Galactose,0.377386,0.778343,2.190822,1.657376,1.378383,1.178669,1.409197,2.002262,2.002542,2.262606,2.14251
Glucose,2.453331,2.152964,2.212451,2.198904,2.557424,2.381528,2.199654,1.47563,1.991543,1.474192,1.927009
Glycerol,1.376469,1.292893,2.171581,1.53692,1.892702,1.48166,2.181229,1.778999,1.651784,1.935558,1.883815
Gluconate,1.94335,1.351247,1.938917,1.564318,1.652439,1.943311,2.473548,2.764266,2.676802,2.717113,2.777075
Pyruvate,2.489449,2.852389,2.628621,2.984597,2.422922,2.677209,2.252272,2.136324,2.01033,2.254918,2.217691
Succinate,2.01611,1.951044,1.885353,1.918628,1.746941,1.770356,2.262255,0.640986,1.530889,0.634511,1.507812



R_AKGDH	(target)
lr 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v50,1.367532,1.963509
v50_f10,1.513817,2.347561
v50_f20,1.916774,3.160395
v50_mi20,2.526445,3.551662
v50_mi10,3.197771,5.787968
v90_f20,3.30635,4.937212
v90_mi10,3.655469,6.775
v90_mi20,4.122172,6.339842
v90,4.513633,7.316287
v90_f10,10.293233,18.492576



R_AKGDH	(target)
lr	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Acetate,0.002956,0.108013,0.708641,0.550572,0.255984,0.229741,0.336117,0.357424,0.336159,0.101799
Fructose,0.2019,0.138033,0.417671,0.246851,0.43291,0.021636,1.583106,0.448462,0.533983,0.274075
Galactose,5.45715,18.149121,1.867055,9.602121,3.811565,17.061421,2.910037,26.806706,11.609501,12.314829
Glucose,0.389471,0.929003,0.816862,0.656864,0.599028,0.170339,2.334252,0.458793,1.515418,1.187841
Glycerol,0.063925,0.387801,0.178914,0.223766,0.033452,0.393524,0.025306,0.023557,0.308964,0.446016
Gluconate,3.924309,4.197499,7.57338,7.615086,9.692786,17.297871,21.384171,53.515637,17.876653,11.348198
Pyruvate,0.553616,0.413374,0.437451,0.463683,0.449983,0.802874,0.508638,0.561159,0.508896,0.510247
Succinate,0.346931,1.259326,0.11056,0.852619,0.058481,0.131661,0.162129,0.174128,0.287804,0.267793



R_AKGDH	(target)
lr	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,4.267163,4.254548,4.728072,7.291048,1.917783,5.35949,3.28682,2.832898,5.792349,2.832718,3.832769
Fructose,3.874895,4.657236,4.409757,5.493324,4.831418,5.552376,3.958731,10.009264,5.612637,5.944023,4.936907
Galactose,0.236073,1.524356,4.520581,-0.204688,2.502869,-0.663733,4.263805,-0.450907,6.564399,2.976757,3.143265
Glucose,2.138073,1.305355,0.151797,0.391562,0.73365,0.857309,2.50227,-2.852728,1.157139,-1.102002,-0.401618
Glycerol,1.840459,1.722807,1.126727,1.511175,1.428627,1.778891,2.564724,1.793885,1.797102,2.409094,2.661332
Gluconate,0.182358,0.897989,0.947807,1.563427,1.571032,1.949918,3.336768,4.081939,9.941378,3.442314,2.251796
Pyruvate,7.415655,3.310231,4.350215,4.171668,3.977142,4.078733,1.461821,3.643769,3.254295,3.641861,3.631835
Succinate,2.351983,1.536006,-0.60993,2.091948,0.346637,2.214436,2.661648,1.970659,1.942436,1.675072,1.722139



R_AKGDH	(target)
lasso_a0.05 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v50_f10,1.662481,2.370311
v50,1.929565,2.907626
v50_mi20,1.942103,2.793813
v50_mi10,1.989683,2.819504
v50_f20,2.479633,4.356213
v90_mi10,3.438443,5.464577
v90_mi20,4.233419,6.707199
v90_f20,4.635371,7.541937
v90,4.790729,7.754607
v90_f10,6.936533,11.480042



R_AKGDH	(target)
lasso_a0.05	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Acetate,0.127322,0.078818,0.141516,0.479595,0.190048,0.035213,0.050626,0.058448,0.123323,0.007995
Fructose,0.132664,0.267001,0.262726,0.257086,0.515445,0.097627,0.077076,0.086506,0.181236,0.024604
Galactose,5.019483,8.394887,4.539576,4.370141,3.828231,17.797119,10.137166,25.667547,16.60024,16.79898
Glucose,0.519227,0.865865,0.833994,0.775678,0.767226,0.930627,0.972502,0.963275,0.919562,0.921473
Glycerol,0.410303,0.462249,0.229349,0.357402,0.365437,0.097682,0.031773,0.039977,0.106073,0.043746
Gluconate,8.451877,4.818186,6.737634,8.495187,13.60536,18.620694,15.137786,27.913581,15.045602,18.535315
Pyruvate,0.482959,0.45751,0.455058,0.445558,0.450589,0.526976,0.539174,0.508959,0.560955,0.521309
Succinate,0.292687,0.572946,0.099998,0.35618,0.114728,0.219892,0.561439,0.253971,0.330364,0.229547



R_AKGDH	(target)
lasso_a0.05	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,4.267163,3.723859,3.930834,3.663292,2.220654,3.456199,4.116904,4.48319,4.516572,3.740924,4.233045
Fructose,3.874895,4.388954,4.909496,4.892932,4.871077,5.872189,4.25319,4.173558,4.210097,4.577166,3.970232
Galactose,0.236073,1.421035,2.217875,1.307742,1.267743,-0.667668,4.437483,2.629179,6.295475,4.154933,4.20185
Glucose,2.138073,1.027929,0.286791,0.354933,0.479618,0.497689,0.148325,0.058794,0.078521,0.171982,0.167897
Glycerol,1.840459,1.085313,0.989708,1.418351,1.182676,1.167886,2.020238,1.898936,1.914035,2.035682,1.920971
Gluconate,0.182358,1.723628,1.060994,1.411022,1.731526,2.663408,3.577996,2.942859,5.272631,2.926048,3.562427
Pyruvate,7.415655,3.834201,4.022916,4.041101,4.111548,4.074239,3.507784,3.417324,3.641392,3.255807,3.549805
Succinate,2.351983,1.663588,1.004423,2.587176,1.514254,2.621821,1.834801,1.031488,1.754647,1.574973,1.812092



R_AKGDH	(target)
ridge_a0.05 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v50_f10,1.247067,2.098433
v50,1.367632,1.963701
v50_f20,1.842814,3.115803
v50_mi10,2.198928,3.274693
v50_mi20,2.481741,3.491061
v90_f20,3.31252,5.023326
v90_mi10,3.588337,5.718758
v90_mi20,3.775966,5.72324
v90,4.513736,7.316417
v90_f10,7.148843,12.227497



R_AKGDH	(target)
ridge_a0.05	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Acetate,0.002982,0.090142,0.197016,0.52676,0.132804,0.229862,0.260412,0.195628,0.330881,0.136714
Fructose,0.20186,0.235348,0.402266,0.259344,0.42276,0.021451,0.283493,0.03837,0.465014,0.25988
Galactose,5.457649,10.010079,1.171144,9.358043,3.444981,17.061055,9.023634,21.586911,11.618264,12.321659
Glucose,0.389411,0.762117,0.775565,0.64828,0.591082,0.17049,1.496247,0.901414,1.233594,0.896465
Glycerol,0.063985,0.360405,0.164867,0.215194,0.033646,0.393622,0.022578,0.018629,0.301371,0.439539
Gluconate,3.924677,4.730195,6.727841,7.585367,9.602861,17.298885,16.745678,33.730819,15.454272,11.679849
Pyruvate,0.55361,0.440688,0.441241,0.464208,0.446306,0.802774,0.558048,0.542869,0.518031,0.513656
Succinate,0.346884,0.962446,0.096596,0.796733,0.068074,0.131747,0.316604,0.176107,0.286303,0.252398



R_AKGDH	(target)
ridge_a0.05	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,4.267163,4.254436,3.882512,5.107864,2.019392,4.833858,3.286305,3.155941,5.10194,2.85524,3.683783
Fructose,3.874895,4.657083,4.786845,5.433634,4.879824,5.513044,3.958017,4.9734,4.023573,5.676773,4.881903
Galactose,0.236073,1.524473,2.599177,0.512547,2.445249,-0.577193,4.263719,2.366304,5.332149,2.978825,3.144877
Glucose,2.138073,1.305483,0.508612,0.479858,0.752003,0.874297,2.502594,-1.061011,0.210784,-0.49944,0.221366
Glycerol,1.840459,1.722697,1.177148,1.537028,1.444403,1.778535,2.564904,1.798906,1.806174,2.395119,2.649412
Gluconate,0.182358,0.898056,1.044949,1.409236,1.565613,1.93352,3.336953,3.236071,6.333452,3.000573,2.312275
Pyruvate,7.415655,3.310276,4.147662,4.143562,3.973251,4.106002,1.462557,3.277362,3.389928,3.574119,3.606561
Succinate,2.351983,1.536117,0.088327,2.12479,0.47808,2.191874,2.661849,1.607335,1.937783,1.678602,1.758348



R_ICDHyr	(target)
lr 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v50_f20,0.392422,0.408706
v50_f10,0.554561,0.695102
v50,0.598459,0.967175
v50_mi20,0.609703,0.747933
v50_mi10,0.775998,1.040989
v90_mi20,1.252309,1.638832
v90_f20,1.381753,2.089062
v90,1.550353,2.704056
v90_mi10,1.592669,2.182991
v90_f10,2.98239,4.758921



R_ICDHyr	(target)
lr	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Acetate,0.046585,0.056503,0.591469,0.05805,0.121321,0.219945,0.13479,0.418125,0.204479,0.028731
Fructose,0.173583,0.337569,0.103506,0.156209,0.237383,0.021568,0.28971,0.243937,0.150784,0.197791
Galactose,3.137121,3.407459,0.177979,2.225085,0.350109,8.398647,6.686676,12.98553,5.128879,6.564862
Glucose,0.272525,0.449492,0.556484,0.332858,0.379841,0.083841,1.762141,0.437353,0.993878,0.967324
Glycerol,0.150023,0.3962,0.174228,0.205399,0.043046,0.320095,0.045124,0.024436,0.27554,0.104459
Gluconate,0.247722,1.098809,2.326607,1.465078,1.423683,2.550887,3.190025,9.110517,2.501699,2.399788
Pyruvate,0.497921,0.437878,0.414599,0.412837,0.41672,0.716326,0.56027,0.519454,0.538117,0.521022
Succinate,0.262187,0.02407,0.091616,0.022108,0.167276,0.091516,0.072615,0.119771,0.225095,0.270047



R_ICDHyr	(target)
lr	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,4.696417,4.477634,4.431056,7.474202,4.423792,4.126645,3.663464,5.329449,6.660107,3.7361,4.561485
Fructose,4.566119,5.358722,6.107498,5.038739,5.279386,5.650038,4.664601,3.24327,5.679963,5.254618,5.469258
Galactose,0.496177,2.052746,2.186882,0.584486,1.600214,0.322461,4.663396,3.813955,6.939304,3.041011,3.753514
Glucose,2.977971,2.166399,1.639397,1.320777,1.986729,1.846817,3.227648,-2.269633,1.675547,0.018231,0.097308
Glycerol,2.464836,2.095055,3.441405,2.035393,1.958562,2.358736,3.253817,2.353614,2.404605,3.143997,2.722312
Gluconate,1.154034,1.439913,2.422097,3.839016,2.844783,2.797012,4.097843,4.835429,11.667876,4.041078,3.92347
Pyruvate,7.97932,4.006246,4.485352,4.671099,4.685163,4.654178,2.263526,3.508749,3.834433,3.685514,3.821915
Succinate,3.038949,2.242176,2.965803,3.317367,3.106135,3.547293,3.317062,2.818277,2.674971,2.354898,2.218291



R_ICDHyr	(target)
lasso_a0.05 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v50_f20,0.475671,0.432837
v50_mi20,0.64036,0.746816
v50_mi10,0.69641,0.854795
v50_f10,0.773888,0.940628
v50,0.814617,1.06962
v90_mi10,1.239013,1.803491
v90_mi20,1.439338,2.497558
v90_f20,1.46939,2.538612
v90,1.683836,2.89039
v90_f10,2.235313,3.956661



R_ICDHyr	(target)
lasso_a0.05	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Acetate,0.136425,0.104997,0.112976,0.103643,0.141068,0.008229,0.078347,0.110263,0.03025,0.030795
Fructose,0.189268,0.338584,0.175994,0.160443,0.201447,0.018669,0.231753,0.085766,0.140864,0.001314
Galactose,3.464308,2.832164,2.81754,2.391216,1.516121,8.947867,5.671074,12.057815,7.850027,7.912859
Glucose,0.489574,0.390597,0.495197,0.518741,0.487929,0.862308,0.787584,0.607866,0.757501,0.757504
Glycerol,0.30395,0.40008,0.234035,0.331341,0.274733,0.060604,0.025134,0.013217,0.048678,0.053262
Gluconate,1.370389,1.038752,1.845258,1.196725,0.670345,2.904767,2.254583,4.380297,1.992172,2.31643
Pyruvate,0.417721,0.430812,0.416497,0.420636,0.422561,0.482982,0.476258,0.472487,0.498698,0.481581
Succinate,0.145301,0.035295,0.093608,0.000134,0.091165,0.185262,0.387371,0.15479,0.196512,0.201372



R_ICDHyr	(target)
lasso_a0.05	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,4.696417,4.055707,4.203309,4.165835,4.209668,4.033902,4.735065,5.064365,5.21426,4.838483,4.841046
Fructose,4.566119,5.430338,6.112135,5.369726,5.298721,5.48595,4.651362,5.624332,4.957737,5.209321,4.56012
Galactose,0.496177,2.215089,1.901433,1.894177,1.682645,1.248442,4.935907,3.310036,6.478993,4.391183,4.42236
Glucose,2.977971,1.520033,1.814786,1.503288,1.433176,1.524931,0.410042,0.63257,1.167763,0.722157,0.722147
Glycerol,2.464836,1.71565,3.450968,1.887979,1.648134,1.787665,2.614215,2.526788,2.497415,2.58482,2.596118
Gluconate,1.154034,2.735508,2.352788,3.283523,2.535095,1.927634,4.506233,3.755898,6.209044,3.453067,3.827272
Pyruvate,7.97932,4.64619,4.54173,4.655955,4.622927,4.607572,4.125455,4.179105,4.209196,4.000052,4.13663
Succinate,3.038949,2.597387,3.146211,3.323419,3.038541,3.315995,2.475948,1.861747,2.568549,2.441759,2.42699



R_ICDHyr	(target)
ridge_a0.05 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v50_f20,0.342765,0.380505
v50_f10,0.57482,0.61517
v50,0.598491,0.967256
v50_mi20,0.606897,0.747463
v50_mi10,0.721395,0.97015
v90_mi20,1.216324,1.667284
v90_f20,1.321136,2.073741
v90_mi10,1.37169,2.011273
v90,1.550364,2.704025
v90_f10,2.238031,3.575186



R_ICDHyr	(target)
ridge_a0.05	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Acetate,0.046604,0.097471,0.121313,0.065226,0.134407,0.22004,0.133567,0.236014,0.191478,0.063714
Fructose,0.173551,0.343034,0.159781,0.158046,0.236238,0.021411,0.309193,0.129298,0.163226,0.177679
Galactose,3.137373,3.185062,1.392346,2.227515,0.108853,8.398552,6.337878,10.393176,5.300279,6.571741
Glucose,0.272488,0.425904,0.523707,0.332642,0.365453,0.083938,1.067457,0.750782,0.93598,0.894705
Glycerol,0.150058,0.260317,0.12403,0.192959,0.039843,0.320162,0.038552,0.007655,0.260155,0.102851
Gluconate,0.247785,0.995203,1.796829,1.451545,1.297891,2.550985,2.385915,5.781049,2.163524,2.003854
Pyruvate,0.497917,0.424048,0.410905,0.412379,0.414451,0.716239,0.512716,0.486616,0.494167,0.503444
Succinate,0.262154,0.04012,0.069647,0.014867,0.144983,0.091581,0.188237,0.119655,0.221782,0.251105



R_ICDHyr	(target)
ridge_a0.05	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,v50,v50_mi10,v50_f10,v50_mi20,v50_f20,v90,v90_mi10,v90_f10,v90_mi20,v90_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,4.696417,4.477543,4.23865,5.266156,4.390087,4.065187,3.663017,4.069129,5.804837,3.797157,4.397192
Fructose,4.566119,5.358571,6.132454,5.295696,5.287775,5.644811,4.663883,5.977933,5.156508,5.311428,5.377422
Galactose,0.496177,2.052871,2.076533,1.187028,1.60142,0.550188,4.663349,3.64089,5.653037,3.126056,3.756927
Glucose,2.977971,2.166508,1.709643,1.418388,1.987372,1.889663,3.227937,-0.200884,0.742163,0.190648,0.313565
Glycerol,2.464836,2.094969,3.106475,2.159123,1.989225,2.36663,3.253984,2.369812,2.445967,3.106076,2.718347
Gluconate,1.154034,1.439986,2.302531,3.227634,2.829166,2.651843,4.097956,3.90746,7.825558,3.650814,3.466548
Pyruvate,7.97932,4.006284,4.595702,4.700575,4.68882,4.672286,2.264219,3.888192,4.096458,4.036206,3.962181
Succinate,3.038949,2.242277,2.917027,3.250603,3.084129,3.479544,3.31726,2.466906,2.675322,2.364964,2.275855
