In [1]:
!pip install autogluon==0.8.2




In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

In [4]:

from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.common.utils.utils import setup_outputdir
from autogluon.core.utils.loaders import load_pkl
from autogluon.core.utils.savers import save_pkl
import os.path

"""
@author: Lyle
"""

class MultilabelPredictor:
    """ Tabular Predictor for predicting multiple columns in table.
        Creates multiple TabularPredictor objects which you can also use individually.
        You can access the TabularPredictor for a particular label via: `multilabel_predictor.get_predictor(label_i)`

        Parameters ---------- labels : List[str] The ith element of this list is the column (i.e. `label`) predicted
        by the ith TabularPredictor stored in this object. path : str, default = None Path to directory where models
        and intermediate outputs should be saved. If unspecified, a time-stamped folder called "AutogluonModels/ag-[
        TIMESTAMP]" will be created in the working directory to store all models. Note: To call `fit()` twice and
        save all results of each fit, you must specify different `path` locations or don't specify `path` at all.
        Otherwise files from first `fit()` will be overwritten by second `fit()`. Caution: when predicting many
        labels, this directory may grow large as it needs to store many TabularPredictors. problem_types : List[str],
        default = None The ith element is the `problem_type` for the ith TabularPredictor stored in this object.
        eval_metrics : List[str], default = None The ith element is the `eval_metric` for the ith TabularPredictor
        stored in this object. consider_labels_correlation : bool, default = True Whether the predictions of multiple
        labels should account for label correlations or predict each label independently of the others. If True,
        the ordering of `labels` may affect resulting accuracy as each label is predicted conditional on the previous
        labels appearing earlier in this list (i.e. in an auto-regressive fashion). Set to False if during inference
        you may want to individually use just the ith TabularPredictor without predicting all the other labels.
        kwargs : Arguments passed into the initialization of each TabularPredictor.

    """

    multi_predictor_file = 'multilabel_predictor.pkl'

    def __init__(self, labels, path=None, problem_types=None, eval_metrics=None, consider_labels_correlation=True,
                 **kwargs):
        self.model_root = None
        if len(labels) < 2:
            raise ValueError(
                "MultilabelPredictor is only intended for predicting MULTIPLE labels (columns), use TabularPredictor "
                "for predicting one label (column).")
        if (problem_types is not None) and (len(problem_types) != len(labels)):
            raise ValueError("If provided, `problem_types` must have same length as `labels`")
        if (eval_metrics is not None) and (len(eval_metrics) != len(labels)):
            raise ValueError("If provided, `eval_metrics` must have same length as `labels`")
        self.path = setup_outputdir(path, warn_if_exist=False)
        self.labels = labels
        self.consider_labels_correlation = consider_labels_correlation
        self.predictors = {}  # key = label, value = TabularPredictor or str path to the TabularPredictor for this label
        if eval_metrics is None:
            self.eval_metrics = {}
        else:
            self.eval_metrics = {labels[i]: eval_metrics[i] for i in range(len(labels))}
        problem_type = None
        eval_metric = None
        for i in range(len(labels)):
            label = labels[i]
            path_i = self.path + "Predictor_" + label
            if problem_types is not None:
                problem_type = problem_types[i]
            if eval_metrics is not None:
                eval_metric = eval_metrics[i]
            self.predictors[label] = TabularPredictor(label=label, problem_type=problem_type, eval_metric=eval_metric,
                                                      path=path_i, **kwargs)

    def fit(self, train_data, tuning_data=None, **kwargs):
        """ Fits a separate TabularPredictor to predict each of the labels.

            Parameters
            ----------
            train_data, tuning_data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                See documentation for `TabularPredictor.fit()`.
            kwargs :
                Arguments passed into the `fit()` call for each TabularPredictor.
        """
        if isinstance(train_data, str):
            train_data = TabularDataset(train_data)
        if tuning_data is not None and isinstance(tuning_data, str):
            tuning_data = TabularDataset(tuning_data)
        train_data_og = train_data.copy()
        if tuning_data is not None:
            tuning_data_og = tuning_data.copy()
        else:
            tuning_data_og = None
        save_metrics = len(self.eval_metrics) == 0
        for i in range(len(self.labels)):
            label = self.labels[i]
            predictor = self.get_predictor(label)
            if not self.consider_labels_correlation:
                labels_to_drop = [l for l in self.labels if l != label]
            else:
                labels_to_drop = [self.labels[j] for j in range(i + 1, len(self.labels))]
            train_data = train_data_og.drop(labels_to_drop, axis=1)
            if tuning_data is not None:
                tuning_data = tuning_data_og.drop(labels_to_drop, axis=1)
            print(f"Fitting TabularPredictor for label: {label} ...")
            predictor.fit(train_data=train_data, tuning_data=tuning_data, **kwargs)
            self.predictors[label] = predictor.path
            if save_metrics:
                self.eval_metrics[label] = predictor.eval_metric
        self.save()

    def predict(self, data, **kwargs):
        """ Returns DataFrame with label columns containing predictions for each label.

            Parameters ---------- data : str or autogluon.tabular.TabularDataset or pd.DataFrame Data to make
            predictions for. If label columns are present in this data, they will be ignored. See documentation for
            `TabularPredictor.predict()`. kwargs : Arguments passed into the predict() call for each TabularPredictor.
        """
        return self._predict(data, as_proba=False, **kwargs)

    def predict_proba(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `predict_proba()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to make predictions for. See documentation for `TabularPredictor.predict()` and `TabularPredictor.predict_proba()`.
            kwargs :
                Arguments passed into the `predict_proba()` call for each TabularPredictor (also passed into a `predict()` call).
        """
        return self._predict(data, as_proba=True, **kwargs)

    def evaluate(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `evaluate()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to evalate predictions of all labels for, must contain all labels as columns. See documentation for `TabularPredictor.evaluate()`.
            kwargs :
                Arguments passed into the `evaluate()` call for each TabularPredictor (also passed into the `predict()` call).
        """
        data = self._get_data(data)
        eval_dict = {}
        for label in self.labels:
            print(f"Evaluating TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            eval_dict[label] = predictor.evaluate(data, **kwargs)
            if self.consider_labels_correlation:
                data[label] = predictor.predict(data, **kwargs)
        return eval_dict

    def save(self):
        """ Save MultilabelPredictor to disk. """
        for label in self.labels:
            if not isinstance(self.predictors[label], str):
                self.predictors[label] = self.predictors[label].path
        save_pkl.save(path=self.path + self.multi_predictor_file, object=self)
        print(f"MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('{self.path}')")

    @classmethod
    def load(cls, path):
        """ Load MultilabelPredictor from disk `path` previously specified when creating this MultilabelPredictor. """
        predictor_instance = load_pkl.load(path=os.path.join(path, cls.multi_predictor_file))
        predictor_instance.model_root = path
        return predictor_instance

    def get_predictor(self, label):
        """ Returns TabularPredictor which is used to predict this label. """
        predictor = self.predictors[label]
        if isinstance(predictor, str):
            path_elements = predictor.split("/")
            path_relative_to_root = path_elements[-2] + "/" + path_elements[-1]
            return TabularPredictor.load(path=os.path.join(self.model_root, path_relative_to_root))
        return predictor

    def _get_data(self, data):
        if isinstance(data, str):
            return TabularDataset(data)
        return data.copy()

    def _predict(self, data, as_proba=False, **kwargs):
        data = self._get_data(data)
        if as_proba:
            predproba_dict = {}
        for label in self.labels:
            #             print(f"Predicting with TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            if as_proba:
                predproba_dict[label] = predictor.predict_proba(data, as_multiclass=True, **kwargs)
            data[label] = predictor.predict(data, **kwargs)
        if not as_proba:
            return data[self.labels]
        else:
            return predproba_dict


In [5]:
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 10 12:05:43 2022

@author: Lyle
"""

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

ALL_STRUCTURAL_DATASET = "/content/drive/MyDrive/all_structural_data_aug.csv"


def one_hot_encode_material(data):
    data = data.copy()
    # One-hot encode the materials
    data.loc[:, "Material"] = pd.Categorical(data["Material"], categories=["Steel", "Aluminum", "Titanium"])
    mats_oh = pd.get_dummies(data["Material"], prefix="Material=", prefix_sep="")
    data.drop(["Material"], axis=1, inplace=True)
    data = pd.concat([mats_oh, data], axis=1)
    return data


def load_augmented_framed_dataset():
    reg_data = pd.read_csv(ALL_STRUCTURAL_DATASET, index_col=0)

    x = reg_data.iloc[:, :-11]

    x = one_hot_encode_material(x)

    x, x_scaler = scale(x)
    y = reg_data.iloc[:, -11:-1]

    for col in ['Sim 1 Safety Factor', 'Sim 3 Safety Factor']:
        y[col] = 1 / y[col]
        y.rename(columns={col: col + " (Inverted)"}, inplace=True)
    # THIS MODEL HAS BEEN TRAINED ON SIGNED DISPLACEMENT VALUES INSTEAD OF MAGNITUDES
    # for col in ['Sim 1 Dropout X Disp.', 'Sim 1 Dropout Y Disp.', 'Sim 1 Bottom Bracket X Disp.',
    #             'Sim 1 Bottom Bracket Y Disp.', 'Sim 2 Bottom Bracket Z Disp.', 'Sim 3 Bottom Bracket Y Disp.',
    #             'Sim 3 Bottom Bracket X Rot.', 'Model Mass']:
    #     y[col] = [np.abs(val) for val in y[col].values]
    #     y.rename(columns={col: col + " Magnitude"}, inplace=True)
    y, y_scaler = scale(y)

    return x, y, x_scaler, y_scaler


def scale(v):
    v_scaler = StandardScaler()
    v_scaler.fit(v)
    v_scaled_values = v_scaler.transform(v)
    new_v = pd.DataFrame(v_scaled_values, columns=v.columns, index=v.index)
    return new_v, v_scaler

In [6]:
x_scaled, y_scaled, x_scaler, y_scaler = load_augmented_framed_dataset()
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_scaled, random_state=2023)


  data.loc[:, "Material"] = pd.Categorical(data["Material"], categories=["Steel", "Aluminum", "Titanium"])


In [7]:
print(len(x_test), len(y_test))
len(x_train), len(y_train)

3713 3713


(11138, 11138)

In [8]:
full_training_set = pd.concat([x_train, y_train], axis=1)
len(full_training_set)

11138

In [9]:
my_predictor = MultilabelPredictor(labels=y_scaled.columns)
my_predictor.fit(
    train_data=full_training_set
)

Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20231012_051754/Predictor_Sim 1 Dropout X Disp./"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Wed Aug 30 11:19:59 UTC 2023
Disk Space Avail:   48.77 GB / 83.96 GB (58.1%)
Train Data Rows:    11138
Train Data Columns: 39
Label Column: Sim 1 Dropout X Disp.
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (12.31357753874557, -5.036857510156917, -0.00664, 0.99526)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memo

Fitting TabularPredictor for label: Sim 1 Dropout X Disp. ...


	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
			Note: Converting 5 features to boolean dtype as they only contain 2 unique values.
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 39 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 34 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.7s = Fit runtime
	39 features in original data used to generate 39 features in processed data.
	Train Data (Processed) Memory Usage: 3.09 MB (0.0% of availabl

Fitting TabularPredictor for label: Sim 1 Dropout Y Disp. ...


	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 40 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 35 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	40 features in original data used to generate 40 features in processed data.
	Train Data (Processed) Memory Usage: 3.17 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.21s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatical

[1000]	valid_set's rmse: 0.410868
[2000]	valid_set's rmse: 0.407763


	-0.4077	 = Validation score   (-root_mean_squared_error)
	12.94s	 = Training   runtime
	0.34s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 0.323699
[2000]	valid_set's rmse: 0.312753
[3000]	valid_set's rmse: 0.31383


	-0.3125	 = Validation score   (-root_mean_squared_error)
	19.67s	 = Training   runtime
	0.59s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.3175	 = Validation score   (-root_mean_squared_error)
	80.24s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	-0.3238	 = Validation score   (-root_mean_squared_error)
	21.58s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.3468	 = Validation score   (-root_mean_squared_error)
	19.33s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.2421	 = Validation score   (-root_mean_squared_error)
	16.67s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	-0.338	 = Validation score   (-root_mean_squared_error)
	7.77s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-0.3628	 = Validation score   (-root_mean_squared_error)
	23.01s	 = Training   runtime
	0.04s	 = Validation 

Fitting TabularPredictor for label: Sim 1 Bottom Bracket X Disp. ...


	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 41 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 36 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	41 features in original data used to generate 41 features in processed data.
	Train Data (Processed) Memory Usage: 3.26 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.25s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change t

[1000]	valid_set's rmse: 0.17849
[2000]	valid_set's rmse: 0.176705
[3000]	valid_set's rmse: 0.176325
[4000]	valid_set's rmse: 0.176285
[5000]	valid_set's rmse: 0.176222
[6000]	valid_set's rmse: 0.17615
[7000]	valid_set's rmse: 0.176122
[8000]	valid_set's rmse: 0.176135
[9000]	valid_set's rmse: 0.176134


	-0.1761	 = Validation score   (-root_mean_squared_error)
	36.31s	 = Training   runtime
	1.08s	 = Validation runtime
Fitting model: LightGBM ...
	-0.1156	 = Validation score   (-root_mean_squared_error)
	3.86s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.065	 = Validation score   (-root_mean_squared_error)
	74.62s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	-0.1228	 = Validation score   (-root_mean_squared_error)
	214.39s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.0621	 = Validation score   (-root_mean_squared_error)
	16.53s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.0569	 = Validation score   (-root_mean_squared_error)
	14.22s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	-0.1141	 = Validation score   (-root_mean_squared_error)
	8.41s	 = Training   runtime
	0.05s	 = Validation runti

Fitting TabularPredictor for label: Sim 1 Bottom Bracket Y Disp. ...


	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 37 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	42 features in original data used to generate 42 features in processed data.
	Train Data (Processed) Memory Usage: 3.35 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.2s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 10024, Val Rows: 1114
User-specified model hyperparameters to be fit:
{
	'NN_TORCH': {},
	'GBM': [{'extra_trees':

[1000]	valid_set's rmse: 0.35138
[2000]	valid_set's rmse: 0.347272
[3000]	valid_set's rmse: 0.346965
[4000]	valid_set's rmse: 0.346849
[5000]	valid_set's rmse: 0.346742
[6000]	valid_set's rmse: 0.34666
[7000]	valid_set's rmse: 0.346643
[8000]	valid_set's rmse: 0.346635
[9000]	valid_set's rmse: 0.346645


	-0.3466	 = Validation score   (-root_mean_squared_error)
	42.96s	 = Training   runtime
	0.88s	 = Validation runtime
Fitting model: LightGBM ...
	-0.2898	 = Validation score   (-root_mean_squared_error)
	3.25s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.2351	 = Validation score   (-root_mean_squared_error)
	86.91s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	-0.2848	 = Validation score   (-root_mean_squared_error)
	16.84s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.2471	 = Validation score   (-root_mean_squared_error)
	25.19s	 = Training   runtime
	0.27s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.071	 = Validation score   (-root_mean_squared_error)
	14.27s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	-0.3202	 = Validation score   (-root_mean_squared_error)
	5.86s	 = Training   runtime
	0.01s	 = Validation runtime

[1000]	valid_set's rmse: 0.269673


	-0.2695	 = Validation score   (-root_mean_squared_error)
	22.06s	 = Training   runtime
	0.3s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.0691	 = Validation score   (-root_mean_squared_error)
	0.72s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 239.18s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20231012_051754/Predictor_Sim 1 Bottom Bracket Y Disp./")
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20231012_051754/Predictor_Sim 2 Bottom Bracket Z Disp./"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Wed Aug 30 11:19:59 UTC 2023
Disk Space Avail:   46.79 GB / 83.96 GB (55.7%)
Train Data Rows:    11138
Train Data Columns: 43
Label Column: Sim 2 Bottom Bracket Z Disp.
Preprocessing data ...
AutoGluon infers your p

Fitting TabularPredictor for label: Sim 2 Bottom Bracket Z Disp. ...


	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 43 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 38 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	43 features in original data used to generate 43 features in processed data.
	Train Data (Processed) Memory Usage: 3.44 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.27s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score 

[1000]	valid_set's rmse: 0.273899
[2000]	valid_set's rmse: 0.270414
[3000]	valid_set's rmse: 0.268822
[4000]	valid_set's rmse: 0.268107
[5000]	valid_set's rmse: 0.26762
[6000]	valid_set's rmse: 0.267409
[7000]	valid_set's rmse: 0.267397
[8000]	valid_set's rmse: 0.26734
[9000]	valid_set's rmse: 0.267309
[10000]	valid_set's rmse: 0.267288


	-0.2673	 = Validation score   (-root_mean_squared_error)
	46.36s	 = Training   runtime
	1.54s	 = Validation runtime
Fitting model: LightGBM ...
	-0.281	 = Validation score   (-root_mean_squared_error)
	3.8s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.3268	 = Validation score   (-root_mean_squared_error)
	92.61s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: CatBoost ...
	-0.2025	 = Validation score   (-root_mean_squared_error)
	221.19s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.2968	 = Validation score   (-root_mean_squared_error)
	19.47s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.2434	 = Validation score   (-root_mean_squared_error)
	10.35s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	-0.2427	 = Validation score   (-root_mean_squared_error)
	20.01s	 = Training   runtime
	0.1s	 = Validation runtim

Fitting TabularPredictor for label: Sim 3 Bottom Bracket Y Disp. ...


	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 44 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 39 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	44 features in original data used to generate 44 features in processed data.
	Train Data (Processed) Memory Usage: 3.53 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.23s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change t

Fitting TabularPredictor for label: Sim 3 Bottom Bracket X Rot. ...


	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 45 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 40 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	45 features in original data used to generate 45 features in processed data.
	Train Data (Processed) Memory Usage: 3.62 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.25s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change t

[1000]	valid_set's rmse: 0.381511


	-0.3814	 = Validation score   (-root_mean_squared_error)
	41.49s	 = Training   runtime
	0.41s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.321	 = Validation score   (-root_mean_squared_error)
	0.4s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 458.61s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20231012_051754/Predictor_Sim 3 Bottom Bracket X Rot./")
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20231012_051754/Predictor_Sim 1 Safety Factor (Inverted)/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Wed Aug 30 11:19:59 UTC 2023
Disk Space Avail:   45.30 GB / 83.96 GB (54.0%)
Train Data Rows:    11138
Train Data Columns: 46
Label Column: Sim 1 Safety Factor (Inverted)
Preprocessing data ...
AutoGluon infers your

Fitting TabularPredictor for label: Sim 1 Safety Factor (Inverted) ...


	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 46 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 41 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	46 features in original data used to generate 46 features in processed data.
	Train Data (Processed) Memory Usage: 3.71 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.21s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatical

[1000]	valid_set's rmse: 2.55636
[2000]	valid_set's rmse: 2.54448
[3000]	valid_set's rmse: 2.54223
[4000]	valid_set's rmse: 2.54138
[5000]	valid_set's rmse: 2.54082
[6000]	valid_set's rmse: 2.54058
[7000]	valid_set's rmse: 2.54046
[8000]	valid_set's rmse: 2.54039
[9000]	valid_set's rmse: 2.54029
[10000]	valid_set's rmse: 2.54031


	-2.5403	 = Validation score   (-root_mean_squared_error)
	43.58s	 = Training   runtime
	1.21s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 2.53594


	-2.5352	 = Validation score   (-root_mean_squared_error)
	10.51s	 = Training   runtime
	0.24s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-2.575	 = Validation score   (-root_mean_squared_error)
	147.58s	 = Training   runtime
	0.18s	 = Validation runtime
Fitting model: CatBoost ...
	-2.5576	 = Validation score   (-root_mean_squared_error)
	19.25s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-2.5704	 = Validation score   (-root_mean_squared_error)
	21.36s	 = Training   runtime
	0.31s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-2.1745	 = Validation score   (-root_mean_squared_error)
	9.91s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: XGBoost ...
	-2.5669	 = Validation score   (-root_mean_squared_error)
	23.97s	 = Training   runtime
	0.3s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-2.4459	 = Validation score   (-root_mean_squared_error)
	28.96s	 = Training   runtime
	0.07s	 = Validation 

Fitting TabularPredictor for label: Sim 3 Safety Factor (Inverted) ...


	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 47 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 42 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	47 features in original data used to generate 47 features in processed data.
	Train Data (Processed) Memory Usage: 3.8 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.2s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically

[1000]	valid_set's rmse: 2.5472
[2000]	valid_set's rmse: 2.53887


	-2.538	 = Validation score   (-root_mean_squared_error)
	10.48s	 = Training   runtime
	0.32s	 = Validation runtime
Fitting model: LightGBM ...
	-2.499	 = Validation score   (-root_mean_squared_error)
	5.77s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-2.4316	 = Validation score   (-root_mean_squared_error)
	121.13s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	-2.5459	 = Validation score   (-root_mean_squared_error)
	15.58s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-2.3696	 = Validation score   (-root_mean_squared_error)
	19.94s	 = Training   runtime
	0.27s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 3: early stopping
	-0.6587	 = Validation score   (-root_mean_squared_error)
	11.46s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: XGBoost ...
	-2.4659	 = Validation score   (-root_mean_squared_error)
	7.8s	 = Tr

Fitting TabularPredictor for label: Model Mass ...


	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 48 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 43 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	48 features in original data used to generate 48 features in processed data.
	Train Data (Processed) Memory Usage: 3.89 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.27s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score 

[1000]	valid_set's rmse: 0.130339
[2000]	valid_set's rmse: 0.121286
[3000]	valid_set's rmse: 0.118726
[4000]	valid_set's rmse: 0.117482
[5000]	valid_set's rmse: 0.117009
[6000]	valid_set's rmse: 0.116717
[7000]	valid_set's rmse: 0.116537
[8000]	valid_set's rmse: 0.11647
[9000]	valid_set's rmse: 0.116422
[10000]	valid_set's rmse: 0.1164


	-0.1164	 = Validation score   (-root_mean_squared_error)
	45.2s	 = Training   runtime
	1.22s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 0.137374
[2000]	valid_set's rmse: 0.13594
[3000]	valid_set's rmse: 0.135669
[4000]	valid_set's rmse: 0.135432
[5000]	valid_set's rmse: 0.135351
[6000]	valid_set's rmse: 0.135316
[7000]	valid_set's rmse: 0.135304
[8000]	valid_set's rmse: 0.135294
[9000]	valid_set's rmse: 0.135288
[10000]	valid_set's rmse: 0.135284


	-0.1353	 = Validation score   (-root_mean_squared_error)
	64.87s	 = Training   runtime
	1.26s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.2241	 = Validation score   (-root_mean_squared_error)
	94.44s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: CatBoost ...
	-0.1099	 = Validation score   (-root_mean_squared_error)
	243.06s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.2043	 = Validation score   (-root_mean_squared_error)
	19.78s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.1253	 = Validation score   (-root_mean_squared_error)
	12.47s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	-0.1416	 = Validation score   (-root_mean_squared_error)
	29.69s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-0.1491	 = Validation score   (-root_mean_squared_error)
	23.74s	 = Training   runtime
	0.06s	 = Validat

[1000]	valid_set's rmse: 0.144699
[2000]	valid_set's rmse: 0.144461
[3000]	valid_set's rmse: 0.144442
[4000]	valid_set's rmse: 0.144439
[5000]	valid_set's rmse: 0.144439
[6000]	valid_set's rmse: 0.144438
[7000]	valid_set's rmse: 0.144438
[8000]	valid_set's rmse: 0.144438


	-0.1444	 = Validation score   (-root_mean_squared_error)
	185.42s	 = Training   runtime
	1.48s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.1018	 = Validation score   (-root_mean_squared_error)
	0.69s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 735.68s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20231012_051754/Predictor_Model Mass/")


MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('AutogluonModels/ag-20231012_051754/')


In [10]:
my_predictor = MultilabelPredictor.load("AutogluonModels/ag-20231012_051754/")
predictions = my_predictor.predict(x_test)
r2 = r2_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)

r2, mse, mae

(0.6930220830194771, 0.1826461079600396, 0.12257666253981998)

In [11]:
import shutil

shutil.make_archive("trained-model", "zip", "/content/AutogluonModels")


'/content/trained-model.zip'