# Building ML-models with PCA dimensionality reduction

Preprocessing and feature extraction is performed after splitting the data (therefore, only the samples in the training data are available for these steps). 

1. Models are built as pipelines of different steps. 
    - All inlcude a standardization step.
    - Then a PCA step follows to extract features, but the different models keep a different number of the n first PCs.
    - The models are named after how many PCs they keep.
2. The models are evaluated using leave one out cross validation. 
    - Each model (which uses a given number of extracted PCs as features) is therefore trained and evaluated 8 times 
        - All of the steps (the training) in the model is performed on 7 of the samples to fit the model
            - Therefore, the maximum number of PCs kept is 7 (minimum of n_samples and n_features) 
        - The fitted model is then evaluated by prediciting the flux of the target reaction in the 8th sample.
    - The average of these performances is reported and used to compare the model with models keeping a different number of PCs as features.

In [1]:
# import libraries
import pandas as pd
import numpy as np
from IPython.display import display

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import linear_model
from sklearn import svm

In [2]:
# import self-made functions
%run /Users/mariekececilia/Documents/master_thesis_code/methods.ipynb

In [3]:
#set a random seed to make reproducible results (used in PCA calculation)
seed = 0

## Load, clean and prepare data

In [4]:
#load transcriptional and fluxomic data
gexp, flux = load_gerosa()

#clean the transcriptional data (remove duplicates)
gexp, groups = clean_gexp_g(gexp)

#define features and log-transform them
X = gexp
X = np.log(X)

#define targets and extract their fluxes
target_labels = ['R_PPC', 'R_AKGDH', 'R_ICDHyr']
targets = dict()
for target_label in target_labels:
    targets[target_label] = flux[target_label]

## Create models/pipelines

Choose learning algorithms:

In [5]:
algorithms = dict({
    'lr': linear_model.LinearRegression(),
    'ridge_a1': linear_model.Ridge(),
    'lasso_a1': linear_model.Lasso(),
    'svr': svm.SVR(), 
    })

Get the models/pipelines:

In [6]:
# get a list of models to evaluate, name each by the dimensions they keep
def get_models(max_n_components, model):
    models = dict()
    for i in range(1, max_n_components+1):
        steps = [
            ('standardize', StandardScaler()),
            ('pca', PCA(n_components=i, random_state = seed)), 
            ('model', model)]
        models[str(i)] = Pipeline(steps=steps)
    return models

In [7]:
# get the models to evaluate (available samples is one less due to LOOCV)
max_n_components = min(X.shape[0]-1, X.shape[1]) 

models = dict()

# create combinations of PCs kept and learning algorithms used
for name, algorithm in algorithms.items():
     models[name]= get_models(max_n_components, algorithm)

## Learning

In [8]:
# evaluate the models and store results in nested dicts: reaction -> learning algorithm -> scores/preds of the different n first PCs kept
scores = dict()
preds = dict()
for y_name, y in targets.items():
    model_scores = dict()
    model_preds = dict()
    for model_name, pipelines in models.items():
        #get scores
        model_scores[model_name] = compare_models(pipelines, X, y)
        #get predictions
        model_preds[model_name] = get_models_preds(pipelines, X, y)
    scores[y_name] = model_scores
    preds[y_name] = model_preds

### Save data to use in other notebooks

Scores and predictions are visualized in the Notebook 'compare_models', which also merges the results with the results from all other models built.

In [9]:
scores_pca = scores
preds_pca = preds
%store scores_pca
%store preds_pca

Stored 'scores_pca' (dict)
Stored 'preds_pca' (dict)


### Scores and predictions

In [10]:
for y in targets.keys():
    for algorithm in algorithms.keys():
        print()
        print(y + '\t(target)') 
        print(algorithm + '\t(learning algorithm)') 
        print('–> summary of all cv split scores for each number of PCs kept:')
        df = scores[y][algorithm][1].sort_values(by = 'average')
        df.index.name = 'PCs kept'
        df.columns.name = 'Score'
        display(df)
        
        print()
        print(y + '\t(target)') 
        print(algorithm + '\t(learning algorithm)') 
        print('–> all individual cv-split scores for each number of PCs kept:')
        df = scores[y][algorithm][0]
        df.columns.name = 'PCs kept'
        display(df)
        
        print()
        print(y + '\t(target)') 
        print(algorithm + '\t(learning algorithm)') 
        print('–> all predictions for each number of PCs kept (+ actual values):')
        df = preds[y][algorithm]
        df.index.name = 'Test set'
        df.columns.name = 'PCs kept'
        display(df)


R_PPC	(target)
lr	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
7,0.297126,0.439747
6,0.297126,0.439747
5,0.310365,0.471167
4,0.314939,0.467331
2,0.358261,0.557075
3,0.371358,0.569436
1,0.824399,1.576637



R_PPC	(target)
lr	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.027282,0.178716,0.180808,0.144477,0.143887,0.143348,0.143348
Fructose,0.48416,0.291229,0.291422,0.280306,0.277259,0.27284,0.27284
Galactose,4.973507,1.816175,1.86415,1.53283,1.538454,1.441175,1.441175
Glucose,0.077964,0.089787,0.104066,0.057776,0.065356,0.066814,0.066814
Glycerol,0.491493,0.110607,0.110428,0.111963,0.092215,0.09162,0.09162
Gluconate,0.207011,0.094304,0.125406,0.092627,0.071792,0.069753,0.069753
Pyruvate,0.222655,0.252963,0.253356,0.255478,0.249557,0.248495,0.248495
Succinate,0.111116,0.032302,0.041228,0.044055,0.044397,0.042965,0.042965



R_PPC	(target)
lr	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,1.774145,1.725743,1.457076,1.453365,1.517822,1.518869,1.519825,1.519825
Fructose,3.545669,1.828996,2.513066,2.512384,2.551796,2.562601,2.578267,2.578267
Galactose,0.377386,2.25432,1.062786,1.080891,0.955855,0.957978,0.921266,0.921266
Glucose,2.453331,2.262061,2.233053,2.198022,2.311587,2.292991,2.289415,2.289415
Glycerol,1.376469,2.052993,1.528716,1.528469,1.530583,1.5034,1.50258,1.50258
Gluconate,1.94335,2.345646,2.126617,2.187057,2.123357,2.082868,2.078906,2.078906
Pyruvate,2.489449,1.935161,3.119188,3.120166,3.125447,3.110709,3.108064,3.108064
Succinate,2.01611,2.240133,1.950986,1.93299,1.92729,1.926601,1.929489,1.929489



R_PPC	(target)
ridge_a1	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
6,0.297202,0.439933
7,0.297202,0.439933
5,0.310433,0.471336
4,0.315006,0.467501
2,0.358313,0.557218
3,0.371406,0.569576
1,0.824394,1.576637



R_PPC	(target)
ridge_a1	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.027265,0.17868,0.180772,0.144453,0.143863,0.143324,0.143324
Fructose,0.484162,0.291254,0.291446,0.280335,0.277289,0.272873,0.272873
Galactose,4.973501,1.816612,1.864574,1.533359,1.538981,1.441754,1.441754
Glucose,0.077978,0.0898,0.104075,0.057801,0.065378,0.066834,0.066834
Glycerol,0.491496,0.110654,0.110474,0.11201,0.092269,0.091674,0.091674
Gluconate,0.206993,0.094299,0.125391,0.092623,0.071797,0.069759,0.069759
Pyruvate,0.222655,0.252903,0.253295,0.255416,0.249498,0.248437,0.248437
Succinate,0.111104,0.032299,0.041221,0.044048,0.044389,0.042958,0.042958



R_PPC	(target)
ridge_a1	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,1.774145,1.725773,1.45714,1.45343,1.517865,1.518912,1.519867,1.519867
Fructose,3.545669,1.828991,2.512978,2.512296,2.551695,2.562495,2.578151,2.578151
Galactose,0.377386,2.254317,1.062951,1.081051,0.956055,0.958177,0.921485,0.921485
Glucose,2.453331,2.262026,2.233022,2.198001,2.311527,2.292938,2.289365,2.289365
Glycerol,1.376469,2.052997,1.52878,1.528533,1.530646,1.503475,1.502656,1.502656
Gluconate,1.94335,2.345611,2.126607,2.187029,2.12335,2.082878,2.078917,2.078917
Pyruvate,2.489449,1.93516,3.119037,3.120014,3.125293,3.110562,3.107919,3.107919
Succinate,2.01611,2.240108,1.950993,1.933003,1.927305,1.926617,1.929503,1.929503



R_PPC	(target)
lasso_a1	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
5,0.330995,0.530861
6,0.330995,0.530861
7,0.330995,0.530861
4,0.333775,0.529695
2,0.375456,0.612371
3,0.381382,0.616391
1,0.821361,1.572156



R_PPC	(target)
lasso_a1	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.02209,0.141104,0.141104,0.110913,0.110913,0.110913,0.110913
Fructose,0.485246,0.302909,0.302909,0.294605,0.294605,0.294605,0.294605
Galactose,4.958172,1.981581,1.99919,1.718211,1.718211,1.718211,1.718211
Glucose,0.082261,0.082261,0.086606,0.048069,0.050059,0.050059,0.050059
Glycerol,0.492732,0.128537,0.128537,0.129803,0.118773,0.118773,0.118773
Gluconate,0.201429,0.103492,0.12151,0.095368,0.082163,0.082163,0.082163
Pyruvate,0.222826,0.233408,0.233408,0.233914,0.233914,0.233914,0.233914
Succinate,0.106133,0.030358,0.03779,0.039319,0.039319,0.039319,0.039319



R_PPC	(target)
lasso_a1	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,1.774145,1.734954,1.523807,1.523807,1.577369,1.577369,1.577369,1.577369
Fructose,3.545669,1.825148,2.471652,2.471652,2.501098,2.501098,2.501098,2.501098
Galactose,0.377386,2.248532,1.125208,1.131853,1.025816,1.025816,1.025816,1.025816
Glucose,2.453331,2.251517,2.251517,2.240857,2.335401,2.33052,2.33052,2.33052
Glycerol,1.376469,2.054698,1.553396,1.553396,1.555138,1.539956,1.539956,1.539956
Gluconate,1.94335,2.334797,2.144471,2.179487,2.128684,2.103021,2.103021,2.103021
Pyruvate,2.489449,1.934734,3.070505,3.070505,3.071765,3.071765,3.071765,3.071765
Succinate,2.01611,2.230085,1.954906,1.939922,1.936838,1.936838,1.936838,1.936838



R_PPC	(target)
svr	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
5,0.555874,1.081833
4,0.561193,1.107686
2,0.569455,1.125718
6,0.573237,1.094541
7,0.573285,1.094684
3,0.575714,1.116184
1,0.959012,1.789207



R_PPC	(target)
svr	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.020946,0.06527,0.092905,0.003366,0.007855,0.004729,0.004729
Fructose,0.481557,0.347972,0.348467,0.369264,0.366435,0.360674,0.360676
Galactose,5.655403,3.538854,3.519563,3.476416,3.400776,3.450634,3.45106
Glucose,0.103762,0.173691,0.202437,0.169149,0.190934,0.194098,0.194099
Glycerol,0.738906,0.107656,0.163024,0.262735,0.292774,0.347224,0.347226
Gluconate,0.095119,0.125645,0.113107,0.095052,0.08244,0.087782,0.087781
Pyruvate,0.387916,0.155129,0.142135,0.068597,0.071387,0.062178,0.06213
Succinate,0.188487,0.041418,0.024074,0.044962,0.034389,0.078581,0.078579



R_PPC	(target)
svr	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,1.774145,1.736984,1.658346,1.609318,1.780117,1.760209,1.765756,1.765756
Fructose,3.545669,1.838228,2.311876,2.310121,2.236381,2.246411,2.266837,2.26683
Galactose,0.377386,2.511658,1.712901,1.705621,1.689338,1.660792,1.679608,1.679769
Glucose,2.453331,2.198768,2.02721,1.956686,2.038352,1.984908,1.977145,1.977142
Glycerol,1.376469,2.39355,1.524654,1.600866,1.738115,1.779463,1.854411,1.854415
Gluconate,1.94335,2.1282,2.187523,2.163157,2.128069,2.103561,2.113942,2.11394
Pyruvate,2.489449,1.523752,2.875634,2.843287,2.660216,2.667162,2.644238,2.644118
Succinate,2.01611,2.396121,1.932606,1.967574,1.925462,1.946779,1.857683,1.857687



R_AKGDH	(target)
lr	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
3,2.003519,3.151956
5,2.165815,3.567045
4,2.186779,3.392586
7,2.205597,3.612455
6,2.205597,3.612455
2,2.254454,3.628094
1,4.519427,7.053971



R_AKGDH	(target)
lr	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.545344,0.08095,0.098882,0.133979,0.097933,0.083067,0.083067
Fructose,0.344282,0.370012,0.360384,0.352798,0.394945,0.379356,0.379356
Galactose,13.964714,5.767137,5.10768,5.861589,5.058053,5.414604,5.414604
Glucose,0.649797,0.228263,0.362442,0.505896,0.489197,0.467602,0.467602
Glycerol,0.597623,0.339039,0.239717,0.241592,0.192654,0.170232,0.170232
Gluconate,19.096933,10.590298,9.216472,9.761558,10.634045,10.648603,10.648603
Pyruvate,0.71373,0.435948,0.432218,0.435435,0.455691,0.453939,0.453939
Succinate,0.242992,0.223983,0.210354,0.201384,0.003999,0.027376,0.027376



R_AKGDH	(target)
lr	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,4.267163,1.94009,3.921734,3.845217,3.695451,3.849267,3.912702,3.912702
Fructose,3.874895,2.540839,5.308654,5.271345,5.241952,5.405266,5.344859,5.344859
Galactose,0.236073,3.532758,1.597535,1.441855,1.619832,1.43014,1.514312,1.514312
Glucose,2.138073,3.527386,1.650029,1.363145,1.056431,1.092134,1.138306,1.138306
Glycerol,1.840459,2.940359,1.216471,1.399269,1.395818,1.485888,1.527153,1.527153
Gluconate,0.182358,3.664842,2.113587,1.863058,1.962459,2.121564,2.124219,2.124219
Pyruvate,7.415655,2.122882,4.182816,4.210474,4.186617,4.036408,4.049403,4.049403
Succinate,2.351983,2.923496,1.82518,1.857233,1.878331,2.36139,2.287595,2.287595



R_AKGDH	(target)
ridge_a1	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
3,2.003776,3.152436
5,2.166011,3.567366
4,2.186976,3.392991
6,2.205787,3.612752
7,2.205787,3.612752
2,2.254636,3.628427
1,4.519366,7.053877



R_AKGDH	(target)
ridge_a1	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.54533,0.080996,0.098923,0.134008,0.097978,0.083121,0.083121
Fructose,0.344279,0.369928,0.360302,0.352719,0.394846,0.379267,0.379267
Galactose,13.964563,5.768137,5.10886,5.862528,5.059341,5.415699,5.415699
Glucose,0.649765,0.228194,0.362333,0.505737,0.489045,0.467464,0.467464
Glycerol,0.597622,0.338931,0.239641,0.241515,0.192597,0.170187,0.170187
Gluconate,19.096651,10.590985,9.217588,9.762487,10.634626,10.649176,10.649176
Pyruvate,0.71373,0.435983,0.432254,0.43547,0.455717,0.453966,0.453966
Succinate,0.242989,0.223935,0.210311,0.201344,0.003942,0.027418,0.027418



R_AKGDH	(target)
ridge_a1	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,4.267163,1.940152,3.921541,3.845044,3.695328,3.849075,3.912472,3.912472
Fructose,3.874895,2.540848,5.308327,5.271028,5.241645,5.404883,5.344514,5.344514
Galactose,0.236073,3.532722,1.597771,1.442134,1.620054,1.430444,1.51457,1.51457
Glucose,2.138073,3.527319,1.650179,1.363379,1.05677,1.092459,1.138602,1.138602
Glycerol,1.840459,2.940358,1.21667,1.39941,1.39596,1.485993,1.527237,1.527237
Gluconate,0.182358,3.664791,2.113712,1.863262,1.962629,2.12167,2.124324,2.124324
Pyruvate,7.415655,2.122882,4.182554,4.210205,4.186356,4.036214,4.0492,4.0492
Succinate,2.351983,2.923489,1.825293,1.857335,1.878425,2.361253,2.287497,2.287497



R_AKGDH	(target)
lasso_a1	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
3,2.070428,3.259785
5,2.213581,3.623728
6,2.219154,3.637143
7,2.219154,3.637143
4,2.233724,3.472188
2,2.297574,3.689924
1,4.507258,7.035458



R_AKGDH	(target)
lasso_a1	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.543186,0.092271,0.108389,0.140933,0.108281,0.104367,0.104367
Fructose,0.343288,0.361311,0.352758,0.347744,0.384953,0.374466,0.374466
Galactose,13.940199,6.031555,5.420641,6.094073,5.359266,5.452316,5.452316
Glucose,0.644865,0.215056,0.337836,0.472394,0.46211,0.448717,0.448717
Glycerol,0.596697,0.327482,0.240999,0.242673,0.200254,0.180122,0.180122
Gluconate,19.03744,10.688205,9.453803,9.928162,10.719346,10.719346,10.719346
Pyruvate,0.713672,0.442398,0.439029,0.441704,0.459179,0.458638,0.458638
Succinate,0.23872,0.222316,0.209968,0.20211,0.015259,0.015259,0.015259



R_AKGDH	(target)
lasso_a1	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,4.267163,1.949302,3.873426,3.804649,3.665777,3.805111,3.821813,3.821813
Fructose,3.874895,2.544688,5.274937,5.241795,5.222368,5.366545,5.325911,5.325911
Galactose,0.236073,3.52697,1.659957,1.515737,1.674716,1.501248,1.523214,1.523214
Glucose,2.138073,3.516842,1.678268,1.415755,1.128061,1.150048,1.178683,1.178683
Glycerol,1.840459,2.938655,1.237741,1.39691,1.39383,1.471899,1.508952,1.508952
Gluconate,0.182358,3.653993,2.131441,1.906338,1.992841,2.13712,2.13712,2.13712
Pyruvate,7.415655,2.123308,4.134986,4.159964,4.140128,4.01054,4.014551,4.014551
Succinate,2.351983,2.913448,1.8291,1.858142,1.876624,2.316095,2.316095,2.316095



R_AKGDH	(target)
svr	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
6,2.86889,4.480601
7,2.86896,4.480721
5,2.910451,4.566049
3,2.926301,4.640078
4,2.942467,4.664936
2,3.156404,5.023604
1,3.654903,5.524426



R_AKGDH	(target)
svr	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.535921,0.455025,0.446499,0.416995,0.465571,0.475807,0.475807
Fructose,0.36651,0.073458,0.126892,0.197882,0.241505,0.261403,0.261405
Galactose,13.128342,12.250591,11.200498,11.401836,10.970677,10.476017,10.476488
Glucose,0.235563,0.127746,0.133573,0.039227,0.041363,0.057053,0.057034
Glycerol,0.936357,0.28228,0.177241,0.21498,0.240198,0.208453,0.20846
Gluconate,13.296048,11.443542,10.712898,10.620524,10.655075,10.770883,10.770974
Pyruvate,0.726963,0.488796,0.504702,0.535806,0.570614,0.578938,0.578957
Succinate,0.013515,0.129795,0.108106,0.112484,0.098601,0.122564,0.122556



R_AKGDH	(target)
svr	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,4.267163,1.980301,2.325497,2.361881,2.487775,2.280496,2.236816,2.236816
Fructose,3.874895,2.454705,3.590254,3.383202,3.108123,2.939088,2.861987,2.861979
Galactose,0.236073,3.335313,3.1281,2.880202,2.927733,2.825948,2.709172,2.709283
Glucose,2.138073,2.641724,2.411204,1.852485,2.054203,2.049636,2.01609,2.01613
Glycerol,1.840459,3.563786,2.359983,2.166664,2.23612,2.282533,2.224109,2.22412
Gluconate,0.182358,2.607003,2.269183,2.135944,2.119099,2.125399,2.146518,2.146535
Pyruvate,7.415655,2.024746,3.790913,3.672962,3.442299,3.184176,3.122451,3.12231
Succinate,2.351983,2.383771,2.046708,2.097719,2.087422,2.120075,2.063714,2.063734



R_ICDHyr	(target)
lr	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
3,0.709891,0.941549
5,0.730105,0.970504
6,0.739504,1.008534
7,0.739504,1.008534
2,0.770457,1.047696
4,0.780035,1.052047
1,1.57758,2.346282



R_ICDHyr	(target)
lr	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.453144,0.101326,0.116673,0.148836,0.119222,0.103607,0.103607
Fructose,0.300202,0.315971,0.30853,0.304411,0.335285,0.321733,0.321733
Galactose,7.440081,3.319505,3.031746,3.395997,3.038531,3.155384,3.155384
Glucose,0.401843,0.160241,0.249849,0.346713,0.328595,0.313202,0.313202
Glycerol,0.445135,0.255191,0.187814,0.189104,0.148298,0.133264,0.133264
Gluconate,2.734414,1.46029,1.246425,1.321339,1.464783,1.470396,1.470396
Pyruvate,0.652899,0.389207,0.385957,0.389481,0.405699,0.404011,0.404011
Succinate,0.192924,0.161925,0.152131,0.144402,0.000428,0.014433,0.014433



R_ICDHyr	(target)
lr	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,4.696417,2.568265,4.220548,4.148474,3.997421,4.136501,4.209834,4.209834
Fructose,4.566119,3.195363,6.008882,5.974904,5.956096,6.09707,6.03519,6.03519
Galactose,0.496177,4.187778,2.143241,2.000461,2.181194,2.003828,2.061808,2.061808
Glucose,2.977971,4.174649,2.500779,2.233928,1.945468,1.999424,2.045263,2.045263
Glycerol,2.464836,3.562023,1.835832,2.001905,1.998725,2.099307,2.136363,2.136363
Gluconate,1.154034,4.309639,2.839258,2.59245,2.678903,2.844443,2.85092,2.85092
Pyruvate,7.97932,2.769629,4.87371,4.899644,4.871529,4.742116,4.755584,4.755584
Succinate,3.038949,3.625236,2.546869,2.576631,2.600118,3.037648,2.995088,2.995088



R_ICDHyr	(target)
ridge_a1	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
3,0.709964,0.941753
5,0.730178,0.970704
6,0.739572,1.008714
7,0.739572,1.008714
2,0.770513,1.04787
4,0.780086,1.052216
1,1.57756,2.346262



R_ICDHyr	(target)
ridge_a1	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.45313,0.101357,0.1167,0.148853,0.119252,0.103647,0.103647
Fructose,0.3002,0.315898,0.308459,0.304341,0.335201,0.321657,0.321657
Galactose,7.440015,3.320018,3.032337,3.396472,3.039161,3.155951,3.155951
Glucose,0.401819,0.1602,0.249782,0.346613,0.328502,0.313119,0.313119
Glycerol,0.445135,0.25511,0.187754,0.189044,0.148254,0.133228,0.133228
Gluconate,2.734364,1.460386,1.246587,1.321475,1.464862,1.470472,1.470472
Pyruvate,0.652899,0.389241,0.385992,0.389514,0.405725,0.404038,0.404038
Succinate,0.192919,0.161891,0.152101,0.144375,0.000469,0.014467,0.014467



R_ICDHyr	(target)
ridge_a1	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,4.696417,2.56833,4.220401,4.148346,3.997343,4.13636,4.209649,4.209649
Fructose,4.566119,3.195369,6.008546,5.974579,5.955776,6.096685,6.034843,6.034843
Galactose,0.496177,4.187745,2.143495,2.000755,2.18143,2.00414,2.062089,2.062089
Glucose,2.977971,4.174578,2.5009,2.234128,1.945767,1.999702,2.045512,2.045512
Glycerol,2.464836,3.562022,1.836032,2.002052,1.998873,2.099415,2.136452,2.136452
Gluconate,1.154034,4.309582,2.839368,2.592637,2.67906,2.844534,2.851007,2.851007
Pyruvate,7.97932,2.769629,4.873443,4.89937,4.871265,4.741908,4.755369,4.755369
Succinate,3.038949,3.625221,2.546972,2.576724,2.600203,3.037524,2.994986,2.994986



R_ICDHyr	(target)
lasso_a1	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
3,0.732127,0.990602
6,0.745582,1.0158
7,0.745582,1.0158
5,0.750366,1.013353
2,0.786803,1.087931
4,0.795299,1.088813
1,1.573648,2.342746



R_ICDHyr	(target)
lasso_a1	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.451183,0.111612,0.125311,0.155154,0.128624,0.12296,0.12296
Fructose,0.299359,0.308587,0.302059,0.300122,0.326805,0.317583,0.317583
Galactose,7.428417,3.445311,3.180647,3.506608,3.181842,3.181842,3.181842
Glucose,0.398303,0.150758,0.232183,0.32266,0.309147,0.299644,0.299644
Glycerol,0.444444,0.246562,0.188772,0.189911,0.153973,0.140648,0.140648
Gluconate,2.725013,1.475761,1.283927,1.347665,1.478262,1.478262,1.478262
Pyruvate,0.652846,0.395201,0.392287,0.395307,0.408941,0.408379,0.408379
Succinate,0.189618,0.160635,0.151832,0.144964,0.015333,0.015333,0.015333



R_ICDHyr	(target)
lasso_a1	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,4.696417,2.577476,4.17224,4.107906,3.967747,4.092345,4.118944,4.118944
Fructose,4.566119,3.199212,5.975165,5.945355,5.936512,6.058349,6.016242,6.016242
Galactose,0.496177,4.18199,2.205663,2.074343,2.236077,2.074936,2.074936,2.074936
Glucose,2.977971,4.164105,2.529018,2.286537,2.017098,2.057339,2.08564,2.08564
Glycerol,2.464836,3.560318,1.857102,1.999545,1.996736,2.085318,2.118162,2.118162
Gluconate,1.154034,4.29879,2.857112,2.635729,2.709284,2.859998,2.859998,2.859998
Pyruvate,7.97932,2.770056,4.825881,4.849134,4.82504,4.716247,4.720732,4.720732
Succinate,3.038949,3.615188,2.550789,2.577539,2.598411,2.992353,2.992353,2.992353



R_ICDHyr	(target)
svr	(learning algorithm)
–> summary of all cv split scores for each number of PCs kept:


Score,average,std
PCs kept,Unnamed: 1_level_1,Unnamed: 2_level_1
6,1.091856,1.800532
7,1.091891,1.800616
3,1.103454,1.897451
4,1.107072,1.913749
5,1.107148,1.863674
2,1.156868,2.057864
1,1.394295,2.161368



R_ICDHyr	(target)
svr	(learning algorithm)
–> all individual cv-split scores for each number of PCs kept:


PCs kept,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Acetate,0.425926,0.357285,0.350516,0.321279,0.37048,0.376885,0.376885
Fructose,0.319528,0.052121,0.097536,0.158103,0.204809,0.212399,0.212401
Galactose,6.937306,6.452645,5.989697,6.039125,5.901968,5.7148,5.715058
Glucose,0.121373,0.034484,0.159766,0.088752,0.091665,0.10031,0.100296
Glycerol,0.786603,0.222029,0.178172,0.179866,0.194272,0.196228,0.196233
Gluconate,1.85431,1.601872,1.505628,1.492379,1.504309,1.51592,1.515934
Pyruvate,0.663956,0.438789,0.448078,0.477175,0.515178,0.519759,0.519776
Succinate,0.045362,0.095722,0.09824,0.099901,0.074506,0.098551,0.098544



R_ICDHyr	(target)
svr	(learning algorithm)
–> all predictions for each number of PCs kept (+ actual values):


PCs kept,actual,1,2,3,4,5,6,7
Test set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Acetate,4.696417,2.696089,3.018456,3.050246,3.187557,2.95649,2.926409,2.926409
Fructose,4.566119,3.107117,4.32813,4.120756,3.844203,3.630937,3.596281,3.596272
Galactose,0.496177,3.938312,3.697834,3.46813,3.492655,3.424601,3.331732,3.33186
Glucose,2.977971,3.339415,3.080663,2.502192,2.71367,2.704997,2.679251,2.679292
Glycerol,2.464836,4.403684,3.012102,2.904001,2.908177,2.943686,2.948506,2.948518
Gluconate,1.154034,3.293969,3.002647,2.891579,2.87629,2.890057,2.903457,2.903472
Pyruvate,7.97932,2.681401,4.478079,4.403962,4.171791,3.868552,3.831999,3.831859
Succinate,3.038949,3.176801,2.748054,2.740403,2.735357,2.812529,2.739458,2.739479
