# Building ML-models with feature selection by filter combinations

Preprocessing and feature selection is performed after splitting the data (therefore, only the samples in the training data are available for these steps). 

In [1]:
#import libraries
import pandas as pd
import numpy as np
from IPython.display import display

from functools import partial #to pass parameters to function inside another function
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, mutual_info_regression, f_regression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import linear_model
from sklearn import svm

In [2]:
#import self-made functions
%run /Users/mariekececilia/Documents/master_thesis_code/methods.ipynb

In [3]:
#set a random seed to make reproducible results (used in mutual information)
seed = 0

## Load and clean data

In [4]:
#load transcriptional and fluxomic data
gexp, flux = load_gerosa()

#clean the transcriptional data (remove duplicates)
gexp, groups = clean_gexp_g(gexp)

#define features
X = gexp

#define targets and extract their fluxes
target_labels = ['R_PPC', 'R_AKGDH', 'R_ICDHyr']
targets = dict()
for target_label in target_labels:
    targets[target_label] = flux[target_label]

## Create models/pipelines

Choose selection parameters:

In [5]:
cov_filters = [0.1, 0.2]
k_numbers = [10,20]

Choose learning algorithms:

In [6]:
algorithms = dict({
    'lr': linear_model.LinearRegression(),
    'svr': svm.SVR(), 
    })

Make combinations:

In [7]:
#make combinations of learning algorithms and filters 
def get_pipelines(algorithm, cov_filters, k_numbers):
    models = dict()
    
    #'baseline' – no selection
    steps = [ 
        ('standardize', StandardScaler()),
        ('m', algorithm)
    ]
    models['noSelection'] = Pipeline(steps=steps)
    
    for cov_filter in cov_filters:
        
        # filter by CoV only
        steps = [
            ('filter', CoVSelector(p = cov_filter)),
            ('standardize', StandardScaler()),
            ('m', algorithm)
        ]
        models['v%.0f' % (cov_filter*100)] = Pipeline(steps=steps)        

        # filter by CoV and SelectKBest
        for k in k_numbers:
            
            #selection by mi
            steps = [
                ('filter', CoVSelector(p = cov_filter)),
                ('selection', SelectKBest(score_func = partial(
                    mutual_info_regression, 
                    random_state=seed), 
                                          k = k)), 
                ('standardize', StandardScaler()),
                ('m', algorithm)
            ]
            models['v%.0f_mi%.0f' % (cov_filter*100, k)] = Pipeline(steps=steps)

            #selection by f
            steps = [
                ('filter', CoVSelector(p = cov_filter)),
                ('selection', SelectKBest(score_func = f_regression, 
                                          k = k)),
                ('standardize', StandardScaler()),
                ('m', algorithm)
            ]
            models['v%.0f_f%.0f' % (cov_filter*100, k)] = Pipeline(steps=steps)
            
    return models

In [8]:
models = dict()

for name, algorithm in algorithms.items():
     models[name]= get_pipelines(algorithm, cov_filters = cov_filters, k_numbers = k_numbers)

## Learning

In [9]:
# evaluate the models and store results in nested dicts: target -> learning algorithm -> scores/preds of all selection combinations
scores = dict()
preds = dict()
for y_name, y in targets.items():
    model_scores = dict()
    model_preds = dict()
    for model_name, pipelines in models.items():
        #get scores
        model_scores[model_name] = compare_models(pipelines, X, y)
        #get predictions
        model_preds[model_name] = get_models_preds(pipelines, X, y)
    scores[y_name] = model_scores
    preds[y_name] = model_preds

### Save data to use in other notebooks

Scores and predictions are visualized in the Notebook 'compare_models', which also merges the results with the results from all other models built.

In [10]:
scores_filter_first = scores
preds_filter_first = preds
%store scores_filter_first
%store preds_filter_first

Stored 'scores_filter_first' (dict)
Stored 'preds_filter_first' (dict)


### Scores and predictions

In [11]:
for y in targets.keys():
    for algorithm in algorithms.keys():
        print()
        print(y + '\t(target)') 
        print(algorithm, '\t(learning algorithm)') 
        print('–> summary of all cv split scores for each combination of selection methods:')
        df = scores[y][algorithm][1].sort_values(by = 'average')
        df.index.name = 'Selection combo'
        df.columns.name = 'Score'
        display(df)
        
        print()
        print(y + '\t(target)') 
        print(algorithm + '\t(learning algorithm)') 
        print('–> all individual cv-split scores for each combination of selection methods:')
        df = scores[y][algorithm][0]
        df.columns.name = 'Selection combo'
        display(df)
        
        print()
        print(y + '\t(target)') 
        print(algorithm + '\t(learning algorithm)') 
        print('–> all predictions for each combination of selection methods (+ actual values):')
        df = preds[y][algorithm]
        df.index.name = 'Test set'
        df.columns.name = 'Selection combo'
        display(df)


R_PPC	(target)
lr 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v10_f10,0.165893,0.061271
noSelection,0.28809,0.356287
v10,0.291417,0.365301
v20,0.298487,0.365333
v10_f20,0.348834,0.578981
v20_mi20,0.434176,0.558834
v10_mi20,0.43955,0.517082
v20_f20,0.445513,0.771083
v20_f10,0.457395,0.96887
v10_mi10,0.543026,0.548143



R_PPC	(target)
lr	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,0.220192,0.213017,0.344201,0.158488,0.09354,0.028665,0.257289,0.310719,0.102096,0.006371,0.125843
Fructose,0.298812,0.302572,0.304443,0.185369,0.157177,0.238273,0.328021,0.288418,0.179079,0.159263,0.214565
Galactose,1.188443,1.218405,1.457043,0.11532,1.634755,1.85673,1.222058,3.351341,3.016245,1.757287,2.470376
Glucose,0.046707,0.049225,0.193992,0.128289,0.129831,0.15874,0.059936,0.043552,0.026079,0.089217,0.153046
Glycerol,0.203123,0.203523,0.253199,0.209116,0.796665,0.028455,0.211859,0.253199,0.024107,0.840416,0.077286
Gluconate,0.021594,0.025634,0.051264,0.191467,0.006901,0.06996,0.028152,0.109483,0.06217,0.065311,0.030893
Pyruvate,0.290049,0.282247,1.506541,0.062126,0.553569,0.337785,0.251303,0.437169,0.183477,0.37809,0.365106
Succinate,0.035796,0.036715,0.233522,0.276967,0.143964,0.072062,0.029278,0.208361,0.065907,0.17745,0.126989



R_PPC	(target)
lr	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Acetate,1.774145,1.383492,1.396222,1.163483,2.055326,1.608192,1.825002,1.317678,1.222885,1.955278,1.762843,1.550881
Fructose,3.545669,2.486181,2.472848,2.466216,2.888411,2.988372,2.700832,2.382614,2.523034,2.910715,2.980975,2.784891
Galactose,0.377386,0.825888,0.837196,0.927254,0.420906,0.99432,1.078091,0.838574,1.642136,1.515676,1.040562,1.309672
Glucose,2.453331,2.338743,2.332567,1.977405,2.138597,2.134813,2.063889,2.306287,2.346484,2.389351,2.234453,2.077858
Glycerol,1.376469,1.096876,1.096326,1.027949,1.088627,2.473053,1.415636,1.084851,1.027949,1.343287,2.533275,1.48285
Gluconate,1.94335,1.901386,1.893535,2.042975,1.571263,1.92994,2.079308,1.888641,1.730587,2.064168,2.070273,2.003387
Pyruvate,2.489449,3.211511,3.192088,6.239906,2.644108,3.867531,3.330348,3.115055,3.577759,2.946206,3.430683,3.398361
Succinate,2.01611,1.943942,1.942088,2.486917,1.457714,2.306357,2.161395,1.957083,2.436189,1.883235,2.37387,2.272134



R_PPC	(target)
svr 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v20_f20,0.676693,1.386075
v10_mi10,0.684595,1.43815
v20_mi10,0.689387,1.445374
v20_f10,0.701174,1.444732
v10_f20,0.707295,1.466566
v20_mi20,0.721436,1.382097
v10_mi20,0.749085,1.473188
v10_f10,0.76963,1.583574
v20,0.805534,1.537976
v10,0.810555,1.542202



R_PPC	(target)
svr	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,0.138099,0.136716,0.163056,0.090094,0.135089,0.023895,0.125696,0.172974,0.156418,0.130662,0.057391
Fructose,0.493919,0.493936,0.416633,0.449821,0.450945,0.432428,0.493497,0.398891,0.447605,0.44338,0.472705
Galactose,4.865352,4.865241,4.473471,4.943923,4.615491,4.570701,4.849091,4.500813,4.504883,4.346815,4.320053
Glucose,0.213863,0.213579,0.200918,0.20011,0.162262,0.15532,0.212938,0.137354,0.105216,0.143382,0.126279
Glycerol,0.505429,0.504983,0.2072,0.283427,0.544345,0.287715,0.497373,0.2072,0.31354,0.551325,0.353108
Gluconate,0.026508,0.0268,0.001192,0.020822,0.004012,0.028014,0.022455,0.011384,0.005647,0.037798,0.016568
Pyruvate,0.220266,0.218926,0.011635,0.163194,0.062998,0.148177,0.217032,0.005443,0.032484,0.103326,0.042668
Succinate,0.023355,0.024257,0.002654,0.00565,0.017541,0.012112,0.026192,0.081037,0.043595,0.014799,0.024774



R_PPC	(target)
svr	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Acetate,1.774145,2.019154,2.016699,2.063431,1.614306,2.013813,1.731753,1.997149,2.081027,1.496637,2.005959,1.672325
Fructose,3.545669,1.794395,1.794334,2.068425,1.950754,1.946766,2.012423,1.795893,2.131334,1.958608,1.973591,1.869614
Galactose,0.377386,2.213503,2.213461,2.065613,2.243155,2.119209,2.102306,2.207367,2.075931,2.077467,2.017815,2.007715
Glucose,2.453331,1.928656,1.92935,1.960413,1.962396,2.055248,2.072281,1.930924,2.116355,2.195202,2.101568,2.143528
Glycerol,1.376469,2.072176,2.071562,1.661673,1.766597,2.125743,1.7725,2.061087,1.661673,1.808047,2.13535,1.86251
Gluconate,1.94335,1.994865,1.995432,1.945667,1.902886,1.935554,1.88891,1.986989,1.921228,1.954325,2.016806,1.975548
Pyruvate,2.489449,1.941108,1.944443,2.518413,2.083187,2.332618,2.120569,1.949159,2.475898,2.408582,2.232224,2.383228
Succinate,2.01611,1.969024,1.967205,2.021462,2.027502,1.980745,2.04053,1.963305,2.17949,2.104003,2.045947,2.066057



R_AKGDH	(target)
lr 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v20_mi20,0.897167,1.588921
v10_f10,0.950969,1.374073
v20_f10,1.131259,1.403639
v20_mi10,1.136071,1.55643
v20_f20,1.168169,1.850398
v10_mi20,1.213662,2.03565
v10_f20,1.33137,1.737049
v10_mi10,1.453666,1.510682
v20,1.864618,2.690932
v10,1.906033,2.854543



R_AKGDH	(target)
lr	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,0.065811,0.055309,0.056992,0.23164,0.167866,0.14836,0.049365,0.104276,0.184567,0.000976,0.208926
Fructose,0.369301,0.364752,0.584504,0.27662,0.05485,0.393068,0.392594,0.185159,0.27662,0.311837,0.434541
Galactose,5.864356,5.650954,4.718317,1.206252,1.983844,4.617399,6.17137,4.962867,2.666416,0.516975,0.582298
Glucose,0.434356,0.448601,0.723245,0.577676,0.247323,0.253512,0.454946,0.723245,0.577676,0.461827,0.626921
Glycerol,0.419598,0.424372,1.55805,0.320816,0.03444,0.700639,0.440176,0.490756,0.555892,0.019984,0.906854
Gluconate,7.899587,7.849211,3.006315,4.483124,6.366645,4.000441,6.846248,2.00272,4.23217,5.073364,6.022993
Pyruvate,0.435497,0.435666,0.285514,0.465828,0.484081,0.512708,0.469603,0.487407,0.465828,0.448523,0.489898
Succinate,0.01627,0.019394,0.696387,0.045799,0.370246,0.02483,0.09264,0.132138,0.090906,0.343849,0.072918



R_AKGDH	(target)
lr	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Acetate,4.267163,3.986338,4.031149,4.510355,5.25561,3.550852,3.634085,4.056513,4.712124,5.054741,4.271329,3.375644
Fructose,3.874895,5.305896,5.288272,6.139787,4.946766,4.087432,5.397993,5.396157,4.592367,4.946766,5.083231,5.558695
Galactose,0.236073,1.620486,1.570107,1.349938,0.520836,0.704403,1.326113,1.692963,-0.935524,0.86554,0.358116,0.373537
Glucose,2.138073,1.209388,1.178931,0.591722,0.902959,1.609278,1.596047,1.165366,0.591722,0.902959,1.150654,0.79767
Glycerol,1.840459,1.068206,1.05942,-1.027068,1.25001,1.777073,0.550962,1.030333,0.937242,0.817363,1.877238,0.171431
Gluconate,0.182358,1.622913,1.613727,0.730585,0.999893,1.343369,0.911872,1.430828,0.547571,0.954129,1.107528,1.280701
Pyruvate,7.415655,4.186161,4.184903,5.298382,3.961233,3.82588,3.613589,3.933239,3.801211,3.961233,4.089565,3.78274
Succinate,2.351983,2.313715,2.306368,3.989874,2.244264,3.222795,2.293584,2.134096,2.041197,2.138173,1.543257,2.180481



R_AKGDH	(target)
svr 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v20_f10,2.246466,3.555317
v20_f20,2.412639,3.916047
v10_f20,2.487887,3.998263
v10_f10,2.60616,4.210618
v20_mi10,2.638108,4.133567
v20_mi20,2.644128,4.156369
v10_mi20,2.897619,4.563152
v10_mi10,2.957003,4.62455
noSelection,3.745276,5.809223
v10,3.749414,5.820828



R_AKGDH	(target)
svr	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,0.468189,0.467074,0.438292,0.239123,0.415811,0.238716,0.462485,0.381235,0.234539,0.345307,0.220518
Fructose,0.395049,0.39355,0.080575,0.10254,0.141966,0.061848,0.384294,0.0399,0.10254,0.098531,0.023781
Galactose,12.115133,12.142796,10.280032,10.724484,11.290308,9.31237,12.287833,9.734572,8.053399,10.103166,8.824999
Glucose,0.251719,0.242021,0.189701,0.15407,0.137171,0.110323,0.217153,0.189701,0.15407,0.161186,0.089013
Glycerol,0.605518,0.607538,0.230498,0.042509,0.144565,0.070615,0.617209,0.184004,0.075203,0.163076,0.056339
Gluconate,15.301002,15.322697,11.601799,8.989362,10.27481,9.50318,15.35866,9.847906,8.734054,9.565819,9.541962
Pyruvate,0.686793,0.685536,0.668013,0.493842,0.680988,0.540953,0.682023,0.631469,0.493842,0.600768,0.50869
Succinate,0.138805,0.134103,0.167117,0.103351,0.095331,0.065095,0.12739,0.096072,0.124082,0.115174,0.035812



R_AKGDH	(target)
svr	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Acetate,4.267163,2.269324,2.274081,2.396899,3.246784,2.492832,3.248522,2.293665,2.64037,3.266348,2.79368,3.326178
Fructose,3.874895,2.344121,2.34993,3.562676,3.477564,3.324791,3.635239,2.385796,3.720285,3.477564,3.493098,3.782745
Galactose,0.236073,3.096122,3.102653,2.662906,2.767828,2.901404,2.434467,3.136892,2.534138,2.137259,2.621152,2.319412
Glucose,2.138073,2.676267,2.655533,2.543668,1.808661,2.431356,1.902195,2.602363,2.543668,1.808661,2.4827,1.947757
Glycerol,1.840459,2.954889,2.958608,2.264681,1.918694,2.106525,1.970423,2.976406,2.17911,1.978867,2.140593,1.944149
Gluconate,0.182358,2.972623,2.976579,2.298042,1.821643,2.056055,1.915342,2.983137,1.978206,1.775085,1.926765,1.922414
Pyruvate,7.415655,2.322637,2.331958,2.461904,3.753494,2.365682,3.404135,2.358004,2.732898,3.753494,2.960567,3.643382
Succinate,2.351983,2.678449,2.667392,2.74504,2.595064,2.576199,2.505085,2.651602,2.126023,2.060145,2.081095,2.436211



R_ICDHyr	(target)
lr 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v20_mi20,0.359214,0.309566
v20_f10,0.469866,0.366077
v10_mi20,0.504877,0.540818
v20_mi10,0.51047,0.29292
v20_f20,0.540485,0.425168
v10_mi10,0.588769,0.352574
v10_f20,0.599622,0.80065
v10,0.710732,0.995799
noSelection,0.722889,1.024693
v20,0.745409,1.082459



R_ICDHyr	(target)
lr	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,0.100186,0.090597,0.184935,0.360476,0.060775,0.139412,0.083767,0.214179,0.12472,0.07558,0.230204
Fructose,0.311604,0.307652,0.730386,1.3471,0.514625,0.778011,0.328333,1.209526,1.187422,0.557744,0.868367
Galactose,3.335527,3.24591,0.203053,5.283743,1.856698,2.633422,3.549889,0.508551,0.200566,1.069707,1.389635
Glucose,0.286835,0.29599,0.66339,0.295846,0.21863,0.08515,0.298726,0.66339,0.295846,0.193387,0.096635
Glycerol,0.371585,0.376275,0.36363,0.869662,0.123804,0.232443,0.403831,0.36363,0.869662,0.063733,0.242776
Gluconate,0.989239,0.97811,1.340532,0.333968,0.629898,0.472032,0.818686,0.353294,0.625956,0.358364,0.877517
Pyruvate,0.385614,0.386452,0.470588,0.356758,0.348664,0.396717,0.419607,0.453067,0.356758,0.360838,0.396717
Succinate,0.002525,0.004868,0.753641,0.20148,0.285919,0.059791,0.06043,0.318119,0.097996,0.194357,0.222026



R_ICDHyr	(target)
lr	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Acetate,4.696417,4.225902,4.270936,3.827886,3.003469,4.410994,4.04168,4.303015,3.690544,4.11068,5.051373,3.615283
Fructose,4.566119,5.98894,5.970897,7.901146,10.71714,6.915959,8.118609,6.065326,10.088956,9.988028,7.112843,8.531187
Galactose,0.496177,2.151191,2.106725,0.395427,3.117851,1.417429,1.802822,2.257552,0.243846,0.396661,1.026942,1.185683
Glucose,2.977971,2.123786,2.096523,4.953527,2.096949,2.326897,2.724397,2.088373,4.953527,2.096949,2.40207,2.690196
Glycerol,2.464836,1.548941,1.537379,1.568547,0.321262,2.159679,1.891903,1.469459,1.568547,0.321262,2.307744,1.866433
Gluconate,1.154034,2.295648,2.282806,2.701053,1.539444,1.880957,1.698774,2.098825,1.561747,1.876408,1.567598,2.166718
Pyruvate,7.97932,4.902384,4.895694,4.22435,5.132635,5.197219,4.813785,4.631145,4.36415,5.132635,5.100077,4.813785
Succinate,3.038949,3.031275,3.024155,5.329225,3.651238,3.907844,3.22065,2.855304,4.005698,2.741144,2.448308,2.364225



R_ICDHyr	(target)
svr 	(learning algorithm)
–> summary of all cv split scores for each combination of selection methods:


Score,average,std
Selection combo,Unnamed: 1_level_1,Unnamed: 2_level_1
v20_f20,0.866323,1.417818
v20_f10,0.867634,1.465182
v20_mi10,0.959495,1.650628
v10_f20,0.983437,1.661281
v20_mi20,0.999043,1.705811
v10_f10,1.022578,1.833563
v10_mi20,1.099838,1.842766
v10_mi10,1.154877,1.98393
v10,1.353965,2.080971
noSelection,1.35408,2.076732



R_ICDHyr	(target)
svr	(learning algorithm)
–> all individual cv-split scores for each combination of selection methods:


Selection combo,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Acetate,0.366492,0.365448,0.358756,0.323836,0.363548,0.230283,0.358721,0.310678,0.214229,0.33151,0.181262
Fructose,0.330512,0.328893,0.381401,0.26806,0.350166,0.21825,0.318914,0.380533,0.251393,0.376369,0.214738
Galactose,6.592371,6.602266,6.255335,5.807384,5.818205,5.228016,6.646155,5.218304,4.645305,5.413463,4.484874
Glucose,0.122997,0.115938,0.014275,0.17452,0.017098,0.082425,0.098029,0.014275,0.17452,0.024904,0.054093
Glycerol,0.491079,0.49223,0.011053,0.028162,0.021608,0.149713,0.496921,0.011053,0.028162,0.028571,0.147182
Gluconate,2.193906,2.196801,1.567011,1.055972,1.533377,1.464949,2.200395,1.21273,1.125751,1.182305,1.284413
Pyruvate,0.620669,0.619482,0.533547,0.455779,0.615521,0.454278,0.614531,0.462898,0.455779,0.546581,0.454278
Succinate,0.114616,0.110663,0.117641,0.066917,0.079178,0.039586,0.104739,0.065491,0.04593,0.088637,0.109744



R_ICDHyr	(target)
svr	(learning algorithm)
–> all predictions for each combination of selection methods (+ actual values):


Selection combo,actual,noSelection,v10,v10_mi10,v10_f10,v10_mi20,v10_f20,v20,v20_mi10,v20_f10,v20_mi20,v20_f20
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Acetate,4.696417,2.97522,2.980123,3.01155,3.17555,2.989046,3.614914,3.011712,3.237342,3.69031,3.139507,3.845135
Fructose,4.566119,3.056961,3.064356,2.824597,3.342126,2.96722,3.569565,3.109921,2.82856,3.418228,2.847572,3.585598
Galactose,0.496177,3.767163,3.772072,3.599933,3.37767,3.383039,3.090201,3.793849,3.085382,2.801073,3.182216,2.721471
Glucose,2.977971,3.344254,3.32323,3.020481,2.458257,3.02889,2.732513,3.2699,3.020481,2.458257,3.052136,2.816884
Glycerol,2.464836,3.675265,3.678104,2.492081,2.534251,2.518096,2.833855,3.689664,2.492081,2.534251,2.535258,2.827616
Gluconate,1.154034,3.685875,3.689216,2.962417,2.372661,2.923603,2.844634,3.693364,2.553565,2.453188,2.518453,2.63629
Pyruvate,7.97932,3.026803,3.036272,3.721982,4.342517,3.067883,4.35449,3.075779,4.285705,4.342517,3.617979,4.35449
Succinate,3.038949,3.387263,3.37525,3.396453,3.242305,3.279568,3.15925,3.357247,2.839927,2.89937,2.769586,2.705442
