In [1]:
!pip install autogluon.tabular[all]==0.8.2


Collecting autogluon.tabular[all]==0.8.2
  Downloading autogluon.tabular-0.8.2-py3-none-any.whl (285 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.7/285.7 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting autogluon.core==0.8.2 (from autogluon.tabular[all]==0.8.2)
  Downloading autogluon.core-0.8.2-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.0/224.0 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autogluon.features==0.8.2 (from autogluon.tabular[all]==0.8.2)
  Downloading autogluon.features-0.8.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lightgbm<3.4,>=3.3 (from autogluon.tabular[all]==0.8.2)
  Downloading lightgbm-3.3.5-py3-none-manylinux1_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

In [3]:

from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.common.utils.utils import setup_outputdir
from autogluon.core.utils.loaders import load_pkl
from autogluon.core.utils.savers import save_pkl
import os.path

"""
@author: Lyle
"""

class MultilabelPredictor:
    """ Tabular Predictor for predicting multiple columns in table.
        Creates multiple TabularPredictor objects which you can also use individually.
        You can access the TabularPredictor for a particular label via: `multilabel_predictor.get_predictor(label_i)`

        Parameters ---------- labels : List[str] The ith element of this list is the column (i.e. `label`) predicted
        by the ith TabularPredictor stored in this object. path : str, default = None Path to directory where models
        and intermediate outputs should be saved. If unspecified, a time-stamped folder called "AutogluonModels/ag-[
        TIMESTAMP]" will be created in the working directory to store all models. Note: To call `fit()` twice and
        save all results of each fit, you must specify different `path` locations or don't specify `path` at all.
        Otherwise files from first `fit()` will be overwritten by second `fit()`. Caution: when predicting many
        labels, this directory may grow large as it needs to store many TabularPredictors. problem_types : List[str],
        default = None The ith element is the `problem_type` for the ith TabularPredictor stored in this object.
        eval_metrics : List[str], default = None The ith element is the `eval_metric` for the ith TabularPredictor
        stored in this object. consider_labels_correlation : bool, default = True Whether the predictions of multiple
        labels should account for label correlations or predict each label independently of the others. If True,
        the ordering of `labels` may affect resulting accuracy as each label is predicted conditional on the previous
        labels appearing earlier in this list (i.e. in an auto-regressive fashion). Set to False if during inference
        you may want to individually use just the ith TabularPredictor without predicting all the other labels.
        kwargs : Arguments passed into the initialization of each TabularPredictor.

    """

    multi_predictor_file = 'multilabel_predictor.pkl'

    def __init__(self, labels, path=None, problem_types=None, eval_metrics=None, consider_labels_correlation=True,
                 **kwargs):
        self.model_root = None
        if len(labels) < 2:
            raise ValueError(
                "MultilabelPredictor is only intended for predicting MULTIPLE labels (columns), use TabularPredictor "
                "for predicting one label (column).")
        if (problem_types is not None) and (len(problem_types) != len(labels)):
            raise ValueError("If provided, `problem_types` must have same length as `labels`")
        if (eval_metrics is not None) and (len(eval_metrics) != len(labels)):
            raise ValueError("If provided, `eval_metrics` must have same length as `labels`")
        self.path = setup_outputdir(path, warn_if_exist=False)
        self.labels = labels
        self.consider_labels_correlation = consider_labels_correlation
        self.predictors = {}  # key = label, value = TabularPredictor or str path to the TabularPredictor for this label
        if eval_metrics is None:
            self.eval_metrics = {}
        else:
            self.eval_metrics = {labels[i]: eval_metrics[i] for i in range(len(labels))}
        problem_type = None
        eval_metric = None
        for i in range(len(labels)):
            label = labels[i]
            path_i = self.path + "Predictor_" + label
            if problem_types is not None:
                problem_type = problem_types[i]
            if eval_metrics is not None:
                eval_metric = eval_metrics[i]
            self.predictors[label] = TabularPredictor(label=label, problem_type=problem_type, eval_metric=eval_metric,
                                                      path=path_i, **kwargs)

    def fit(self, train_data, tuning_data=None, **kwargs):
        """ Fits a separate TabularPredictor to predict each of the labels.

            Parameters
            ----------
            train_data, tuning_data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                See documentation for `TabularPredictor.fit()`.
            kwargs :
                Arguments passed into the `fit()` call for each TabularPredictor.
        """
        if isinstance(train_data, str):
            train_data = TabularDataset(train_data)
        if tuning_data is not None and isinstance(tuning_data, str):
            tuning_data = TabularDataset(tuning_data)
        train_data_og = train_data.copy()
        if tuning_data is not None:
            tuning_data_og = tuning_data.copy()
        else:
            tuning_data_og = None
        save_metrics = len(self.eval_metrics) == 0
        for i in range(len(self.labels)):
            label = self.labels[i]
            predictor = self.get_predictor(label)
            if not self.consider_labels_correlation:
                labels_to_drop = [l for l in self.labels if l != label]
            else:
                labels_to_drop = [self.labels[j] for j in range(i + 1, len(self.labels))]
            train_data = train_data_og.drop(labels_to_drop, axis=1)
            if tuning_data is not None:
                tuning_data = tuning_data_og.drop(labels_to_drop, axis=1)
            print(f"Fitting TabularPredictor for label: {label} ...")
            predictor.fit(train_data=train_data, tuning_data=tuning_data, **kwargs)
            self.predictors[label] = predictor.path
            if save_metrics:
                self.eval_metrics[label] = predictor.eval_metric
        self.save()

    def predict(self, data, **kwargs):
        """ Returns DataFrame with label columns containing predictions for each label.

            Parameters ---------- data : str or autogluon.tabular.TabularDataset or pd.DataFrame Data to make
            predictions for. If label columns are present in this data, they will be ignored. See documentation for
            `TabularPredictor.predict()`. kwargs : Arguments passed into the predict() call for each TabularPredictor.
        """
        return self._predict(data, as_proba=False, **kwargs)

    def predict_proba(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `predict_proba()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to make predictions for. See documentation for `TabularPredictor.predict()` and `TabularPredictor.predict_proba()`.
            kwargs :
                Arguments passed into the `predict_proba()` call for each TabularPredictor (also passed into a `predict()` call).
        """
        return self._predict(data, as_proba=True, **kwargs)

    def evaluate(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `evaluate()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to evalate predictions of all labels for, must contain all labels as columns. See documentation for `TabularPredictor.evaluate()`.
            kwargs :
                Arguments passed into the `evaluate()` call for each TabularPredictor (also passed into the `predict()` call).
        """
        data = self._get_data(data)
        eval_dict = {}
        for label in self.labels:
            print(f"Evaluating TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            eval_dict[label] = predictor.evaluate(data, **kwargs)
            if self.consider_labels_correlation:
                data[label] = predictor.predict(data, **kwargs)
        return eval_dict

    def save(self):
        """ Save MultilabelPredictor to disk. """
        for label in self.labels:
            if not isinstance(self.predictors[label], str):
                self.predictors[label] = self.predictors[label].path
        save_pkl.save(path=self.path + self.multi_predictor_file, object=self)
        print(f"MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('{self.path}')")

    @classmethod
    def load(cls, path):
        """ Load MultilabelPredictor from disk `path` previously specified when creating this MultilabelPredictor. """
        predictor_instance = load_pkl.load(path=os.path.join(path, cls.multi_predictor_file))
        predictor_instance.model_root = path
        return predictor_instance

    def get_predictor(self, label):
        """ Returns TabularPredictor which is used to predict this label. """
        predictor = self.predictors[label]
        if isinstance(predictor, str):
            path_elements = predictor.split("/")
            path_relative_to_root = path_elements[-2] + "/" + path_elements[-1]
            return TabularPredictor.load(path=os.path.join(self.model_root, path_relative_to_root))
        return predictor

    def _get_data(self, data):
        if isinstance(data, str):
            return TabularDataset(data)
        return data.copy()

    def _predict(self, data, as_proba=False, **kwargs):
        data = self._get_data(data)
        if as_proba:
            predproba_dict = {}
        for label in self.labels:
            #             print(f"Predicting with TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            if as_proba:
                predproba_dict[label] = predictor.predict_proba(data, as_multiclass=True, **kwargs)
            data[label] = predictor.predict(data, **kwargs)
        if not as_proba:
            return data[self.labels]
        else:
            return predproba_dict


In [4]:
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 10 12:05:43 2022

@author: Lyle
"""

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

ALL_STRUCTURAL_DATASET = "/content/drive/MyDrive/all_structural_data_aug.csv"


def one_hot_encode_material(data):
    data = data.copy()
    # One-hot encode the materials
    data.loc[:, "Material"] = pd.Categorical(data["Material"], categories=["Steel", "Aluminum", "Titanium"])
    mats_oh = pd.get_dummies(data["Material"], prefix="Material=", prefix_sep="")
    data.drop(["Material"], axis=1, inplace=True)
    data = pd.concat([mats_oh, data], axis=1)
    return data


def load_augmented_framed_dataset():
    reg_data = pd.read_csv(ALL_STRUCTURAL_DATASET, index_col=0)

    x = reg_data.iloc[:, :-11]

    x = one_hot_encode_material(x)

    x, x_scaler = scale(x)
    y = reg_data.iloc[:, -11:-1]

    for col in ['Sim 1 Safety Factor', 'Sim 3 Safety Factor']:
        y[col] = 1 / y[col]
        y.rename(columns={col: col + " (Inverted)"}, inplace=True)
    # THIS MODEL HAS BEEN TRAINED ON SIGNED DISPLACEMENT VALUES INSTEAD OF MAGNITUDES
    # for col in ['Sim 1 Dropout X Disp.', 'Sim 1 Dropout Y Disp.', 'Sim 1 Bottom Bracket X Disp.',
    #             'Sim 1 Bottom Bracket Y Disp.', 'Sim 2 Bottom Bracket Z Disp.', 'Sim 3 Bottom Bracket Y Disp.',
    #             'Sim 3 Bottom Bracket X Rot.', 'Model Mass']:
    #     y[col] = [np.abs(val) for val in y[col].values]
    #     y.rename(columns={col: col + " Magnitude"}, inplace=True)
    y, y_scaler = scale(y)

    return x, y, x_scaler, y_scaler


def scale(v):
    v_scaler = StandardScaler()
    v_scaler.fit(v)
    v_scaled_values = v_scaler.transform(v)
    new_v = pd.DataFrame(v_scaled_values, columns=v.columns, index=v.index)
    return new_v, v_scaler

In [5]:
x_scaled, y_scaled, x_scaler, y_scaler = load_augmented_framed_dataset()
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_scaled, random_state=2023)


  data.loc[:, "Material"] = pd.Categorical(data["Material"], categories=["Steel", "Aluminum", "Titanium"])


In [6]:
print(len(x_test), len(y_test))
len(x_train), len(y_train)

3713 3713


(11138, 11138)

In [7]:
full_training_set = pd.concat([x_train, y_train], axis=1)
len(full_training_set)

11138

In [8]:
my_predictor = MultilabelPredictor(labels=y_scaled.columns)
my_predictor.fit(
    train_data=full_training_set, presets='optimize_for_deployment', num_gpus=1
)

Presets specified: ['optimize_for_deployment']
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20231016_092811/Predictor_Sim 1 Dropout X Disp./"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Wed Aug 30 11:19:59 UTC 2023
Disk Space Avail:   48.58 GB / 83.96 GB (57.9%)
Train Data Rows:    11138
Train Data Columns: 39
Label Column: Sim 1 Dropout X Disp.
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (12.31357753874557, -5.036857510156917, -0.00664, 0.99526)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Using Feature Generators to preprocess the data ...
Fitting Au

Fitting TabularPredictor for label: Sim 1 Dropout X Disp. ...


Data preprocessing and feature engineering runtime = 0.18s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 10024, Val Rows: 1114
User-specified model hyperparameters to be fit:
{
	'NN_TORCH': {},
	'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
	'CAT': {},
	'XGB': {},
	'FASTAI': {},
	'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
	'XT': [{'crit

Fitting TabularPredictor for label: Sim 1 Dropout Y Disp. ...


	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 40 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 35 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	40 features in original data used to generate 40 features in processed data.
	Train Data (Processed) Memory Usage: 3.17 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.2s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automaticall

[1000]	valid_set's rmse: 0.410868
[2000]	valid_set's rmse: 0.407763


	-0.4077	 = Validation score   (-root_mean_squared_error)
	10.99s	 = Training   runtime
	0.24s	 = Validation runtime
Fitting model: LightGBM ...
	Training LightGBM with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.323699
[2000]	valid_set's rmse: 0.312753
[3000]	valid_set's rmse: 0.31383


	-0.3125	 = Validation score   (-root_mean_squared_error)
	18.54s	 = Training   runtime
	0.5s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.3175	 = Validation score   (-root_mean_squared_error)
	77.57s	 = Training   runtime
	0.26s	 = Validation runtime
Fitting model: CatBoost ...
	Training CatBoost with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.3315	 = Validation score   (-root_mean_squared_error)
	11.52s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.3468	 = Validation score   (-root_mean_squared_error)
	16.85s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 9: early stopping
	-0.2661	 = Validation score   (-root_mean_squared_error)
	11.84s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: XGBoost ...
	-0.338	 = Validation score   (-root_mean_squared_error)
	1.98s	 = Training   runtime
	0.02s	 = Validation 

Fitting TabularPredictor for label: Sim 1 Bottom Bracket X Disp. ...


	Train Data (Processed) Memory Usage: 3.26 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.18s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 10024, Val Rows: 1114
User-specified model hyperparameters to be fit:
{
	'NN_TORCH': {},
	'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
	'CAT': {},
	'XGB': {},
	'FASTAI': {},
	'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffi

[1000]	valid_set's rmse: 0.17849
[2000]	valid_set's rmse: 0.176705
[3000]	valid_set's rmse: 0.176325
[4000]	valid_set's rmse: 0.176285
[5000]	valid_set's rmse: 0.176222
[6000]	valid_set's rmse: 0.17615
[7000]	valid_set's rmse: 0.176122
[8000]	valid_set's rmse: 0.176135
[9000]	valid_set's rmse: 0.176134


	-0.1761	 = Validation score   (-root_mean_squared_error)
	36.12s	 = Training   runtime
	0.89s	 = Validation runtime
Fitting model: LightGBM ...
	Training LightGBM with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.1156	 = Validation score   (-root_mean_squared_error)
	2.6s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.065	 = Validation score   (-root_mean_squared_error)
	69.74s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: CatBoost ...
	Training CatBoost with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.1609	 = Validation score   (-root_mean_squared_error)
	4.2s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.0621	 = Validation score   (-root_mean_squared_error)
	16.78s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.0539	 = Validation score   (-root_mean_square

Fitting TabularPredictor for label: Sim 1 Bottom Bracket Y Disp. ...


Data preprocessing and feature engineering runtime = 0.17s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 10024, Val Rows: 1114
User-specified model hyperparameters to be fit:
{
	'NN_TORCH': {},
	'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
	'CAT': {},
	'XGB': {},
	'FASTAI': {},
	'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
	'XT': [{'crit

[1000]	valid_set's rmse: 0.35138
[2000]	valid_set's rmse: 0.347272
[3000]	valid_set's rmse: 0.346965
[4000]	valid_set's rmse: 0.346849
[5000]	valid_set's rmse: 0.346742
[6000]	valid_set's rmse: 0.34666
[7000]	valid_set's rmse: 0.346643
[8000]	valid_set's rmse: 0.346635
[9000]	valid_set's rmse: 0.346645


	-0.3466	 = Validation score   (-root_mean_squared_error)
	36.36s	 = Training   runtime
	1.04s	 = Validation runtime
Fitting model: LightGBM ...
	Training LightGBM with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.2898	 = Validation score   (-root_mean_squared_error)
	4.01s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.2351	 = Validation score   (-root_mean_squared_error)
	74.91s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: CatBoost ...
	Training CatBoost with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.3046	 = Validation score   (-root_mean_squared_error)
	6.69s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.2471	 = Validation score   (-root_mean_squared_error)
	17.23s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.1068	 = Validation score   (-root_mean_squ

[1000]	valid_set's rmse: 0.269673


	-0.2695	 = Validation score   (-root_mean_squared_error)
	20.25s	 = Training   runtime
	0.25s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.0878	 = Validation score   (-root_mean_squared_error)
	0.51s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 199.22s ... Best model: "WeightedEnsemble_L2"
Deleting model KNeighborsUnif. All files under AutogluonModels/ag-20231016_092811/Predictor_Sim 1 Bottom Bracket Y Disp./models/KNeighborsUnif/ will be removed.
Deleting model KNeighborsDist. All files under AutogluonModels/ag-20231016_092811/Predictor_Sim 1 Bottom Bracket Y Disp./models/KNeighborsDist/ will be removed.
Deleting model LightGBMXT. All files under AutogluonModels/ag-20231016_092811/Predictor_Sim 1 Bottom Bracket Y Disp./models/LightGBMXT/ will be removed.
Deleting model LightGBM. All files under AutogluonModels/ag-20231016_092811/Predictor_Sim 1 Bottom Bracket Y Disp./models/LightGBM/ will be removed.
Deleting model

Fitting TabularPredictor for label: Sim 2 Bottom Bracket Z Disp. ...


	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 43 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 38 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	43 features in original data used to generate 43 features in processed data.
	Train Data (Processed) Memory Usage: 3.44 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.24s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change t

[1000]	valid_set's rmse: 0.273899
[2000]	valid_set's rmse: 0.270414
[3000]	valid_set's rmse: 0.268822
[4000]	valid_set's rmse: 0.268107
[5000]	valid_set's rmse: 0.26762
[6000]	valid_set's rmse: 0.267409
[7000]	valid_set's rmse: 0.267397
[8000]	valid_set's rmse: 0.26734
[9000]	valid_set's rmse: 0.267309
[10000]	valid_set's rmse: 0.267288


	-0.2673	 = Validation score   (-root_mean_squared_error)
	40.04s	 = Training   runtime
	1.26s	 = Validation runtime
Fitting model: LightGBM ...
	Training LightGBM with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.281	 = Validation score   (-root_mean_squared_error)
	3.66s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.3268	 = Validation score   (-root_mean_squared_error)
	87.64s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: CatBoost ...
	Training CatBoost with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.2808	 = Validation score   (-root_mean_squared_error)
	10.2s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.2968	 = Validation score   (-root_mean_squared_error)
	15.81s	 = Training   runtime
	0.28s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.2559	 = Validation score   (-root_mean_squa

Fitting TabularPredictor for label: Sim 3 Bottom Bracket Y Disp. ...


Data preprocessing and feature engineering runtime = 0.17s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 10024, Val Rows: 1114
User-specified model hyperparameters to be fit:
{
	'NN_TORCH': {},
	'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
	'CAT': {},
	'XGB': {},
	'FASTAI': {},
	'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
	'XT': [{'crit

Fitting TabularPredictor for label: Sim 3 Bottom Bracket X Rot. ...


	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 45 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 40 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	45 features in original data used to generate 45 features in processed data.
	Train Data (Processed) Memory Usage: 3.62 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.24s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change t

[1000]	valid_set's rmse: 0.381511


	-0.3814	 = Validation score   (-root_mean_squared_error)
	37.44s	 = Training   runtime
	0.42s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.334	 = Validation score   (-root_mean_squared_error)
	0.35s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 208.6s ... Best model: "WeightedEnsemble_L2"
Deleting model KNeighborsUnif. All files under AutogluonModels/ag-20231016_092811/Predictor_Sim 3 Bottom Bracket X Rot./models/KNeighborsUnif/ will be removed.
Deleting model KNeighborsDist. All files under AutogluonModels/ag-20231016_092811/Predictor_Sim 3 Bottom Bracket X Rot./models/KNeighborsDist/ will be removed.
Deleting model LightGBM. All files under AutogluonModels/ag-20231016_092811/Predictor_Sim 3 Bottom Bracket X Rot./models/LightGBM/ will be removed.
Deleting model RandomForestMSE. All files under AutogluonModels/ag-20231016_092811/Predictor_Sim 3 Bottom Bracket X Rot./models/RandomForestMSE/ will be removed.
Deleting m

Fitting TabularPredictor for label: Sim 1 Safety Factor (Inverted) ...


		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.1s = Fit runtime
	46 features in original data used to generate 46 features in processed data.
	Train Data (Processed) Memory Usage: 3.71 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.18s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 10024, Val Rows: 1114
User-specified model hyperparameters to be fit:
{
	'NN_TORCH': {},
	'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
	'CAT': {},
	'XGB': {},
	'FASTAI': {},
	'RF': [{'criterion': 'gini', 'ag_args': {'name_suffi

[1000]	valid_set's rmse: 2.55636
[2000]	valid_set's rmse: 2.54448
[3000]	valid_set's rmse: 2.54223
[4000]	valid_set's rmse: 2.54138
[5000]	valid_set's rmse: 2.54082
[6000]	valid_set's rmse: 2.54058
[7000]	valid_set's rmse: 2.54046
[8000]	valid_set's rmse: 2.54039
[9000]	valid_set's rmse: 2.54029
[10000]	valid_set's rmse: 2.54031


	-2.5403	 = Validation score   (-root_mean_squared_error)
	43.65s	 = Training   runtime
	1.21s	 = Validation runtime
Fitting model: LightGBM ...
	Training LightGBM with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 2.53594


	-2.5352	 = Validation score   (-root_mean_squared_error)
	8.36s	 = Training   runtime
	0.23s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-2.575	 = Validation score   (-root_mean_squared_error)
	133.67s	 = Training   runtime
	0.17s	 = Validation runtime
Fitting model: CatBoost ...
	Training CatBoost with GPU, note that this may negatively impact model quality compared to CPU training.
	-2.5542	 = Validation score   (-root_mean_squared_error)
	4.21s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-2.5704	 = Validation score   (-root_mean_squared_error)
	19.74s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-1.8275	 = Validation score   (-root_mean_squared_error)
	11.7s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: XGBoost ...
	-2.5669	 = Validation score   (-root_mean_squared_error)
	5.93s	 = Training   runtime
	0.14s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-2.

Fitting TabularPredictor for label: Sim 3 Safety Factor (Inverted) ...


	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 47 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 42 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	47 features in original data used to generate 47 features in processed data.
	Train Data (Processed) Memory Usage: 3.8 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.26s ...
AutoGluon will gauge predictive performance using evaluation metric: 'r

[1000]	valid_set's rmse: 2.5472
[2000]	valid_set's rmse: 2.53887


	-2.538	 = Validation score   (-root_mean_squared_error)
	11.59s	 = Training   runtime
	0.26s	 = Validation runtime
Fitting model: LightGBM ...
	Training LightGBM with GPU, note that this may negatively impact model quality compared to CPU training.
	-2.499	 = Validation score   (-root_mean_squared_error)
	3.84s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-2.4316	 = Validation score   (-root_mean_squared_error)
	113.01s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	Training CatBoost with GPU, note that this may negatively impact model quality compared to CPU training.
	-2.5951	 = Validation score   (-root_mean_squared_error)
	6.31s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-2.3696	 = Validation score   (-root_mean_squared_error)
	20.99s	 = Training   runtime
	0.27s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 3: early stopping
	-0

Fitting TabularPredictor for label: Model Mass ...


	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 48 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 43 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	48 features in original data used to generate 48 features in processed data.
	Train Data (Processed) Memory Usage: 3.89 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.27s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change t

[1000]	valid_set's rmse: 0.130339
[2000]	valid_set's rmse: 0.121286
[3000]	valid_set's rmse: 0.118726
[4000]	valid_set's rmse: 0.117482
[5000]	valid_set's rmse: 0.117009
[6000]	valid_set's rmse: 0.116717
[7000]	valid_set's rmse: 0.116537
[8000]	valid_set's rmse: 0.11647
[9000]	valid_set's rmse: 0.116422
[10000]	valid_set's rmse: 0.1164


	-0.1164	 = Validation score   (-root_mean_squared_error)
	42.36s	 = Training   runtime
	1.17s	 = Validation runtime
Fitting model: LightGBM ...
	Training LightGBM with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.137374
[2000]	valid_set's rmse: 0.13594
[3000]	valid_set's rmse: 0.135669
[4000]	valid_set's rmse: 0.135432
[5000]	valid_set's rmse: 0.135351
[6000]	valid_set's rmse: 0.135316
[7000]	valid_set's rmse: 0.135304
[8000]	valid_set's rmse: 0.135294
[9000]	valid_set's rmse: 0.135288
[10000]	valid_set's rmse: 0.135284


	-0.1353	 = Validation score   (-root_mean_squared_error)
	63.13s	 = Training   runtime
	1.21s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.2241	 = Validation score   (-root_mean_squared_error)
	80.58s	 = Training   runtime
	0.24s	 = Validation runtime
Fitting model: CatBoost ...
	Training CatBoost with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.1399	 = Validation score   (-root_mean_squared_error)
	70.78s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.2043	 = Validation score   (-root_mean_squared_error)
	16.71s	 = Training   runtime
	0.24s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.1258	 = Validation score   (-root_mean_squared_error)
	10.89s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: XGBoost ...
	-0.1416	 = Validation score   (-root_mean_squared_error)
	7.2s	 = Training   runtime
	0.32s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-

[1000]	valid_set's rmse: 0.144699
[2000]	valid_set's rmse: 0.144461
[3000]	valid_set's rmse: 0.144442
[4000]	valid_set's rmse: 0.144439
[5000]	valid_set's rmse: 0.144439
[6000]	valid_set's rmse: 0.144438
[7000]	valid_set's rmse: 0.144438
[8000]	valid_set's rmse: 0.144438


	-0.1444	 = Validation score   (-root_mean_squared_error)
	164.99s	 = Training   runtime
	1.68s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.1045	 = Validation score   (-root_mean_squared_error)
	0.32s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 499.85s ... Best model: "WeightedEnsemble_L2"
Deleting model KNeighborsUnif. All files under AutogluonModels/ag-20231016_092811/Predictor_Model Mass/models/KNeighborsUnif/ will be removed.
Deleting model KNeighborsDist. All files under AutogluonModels/ag-20231016_092811/Predictor_Model Mass/models/KNeighborsDist/ will be removed.
Deleting model RandomForestMSE. All files under AutogluonModels/ag-20231016_092811/Predictor_Model Mass/models/RandomForestMSE/ will be removed.
Deleting model CatBoost. All files under AutogluonModels/ag-20231016_092811/Predictor_Model Mass/models/CatBoost/ will be removed.
Deleting model ExtraTreesMSE. All files under AutogluonModels/ag-20231016_0

MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('AutogluonModels/ag-20231016_092811/')


In [9]:
my_predictor = MultilabelPredictor.load("AutogluonModels/ag-20231016_092811/")
predictions = my_predictor.predict(x_test)
r2 = r2_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)

r2, mse, mae

(0.7004860364064162, 0.180076212466393, 0.12404996870644322)

In [10]:
import shutil

shutil.make_archive("trained-model", "zip", "/content/AutogluonModels")


'/content/trained-model.zip'