In [None]:
!pip install autogluon==0.7.0


In [1]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

In [2]:

from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.common.utils.utils import setup_outputdir
from autogluon.core.utils.loaders import load_pkl
from autogluon.core.utils.savers import save_pkl
import os.path

"""
@author: Lyle
"""

class MultilabelPredictor:
    """ Tabular Predictor for predicting multiple columns in table.
        Creates multiple TabularPredictor objects which you can also use individually.
        You can access the TabularPredictor for a particular label via: `multilabel_predictor.get_predictor(label_i)`

        Parameters ---------- labels : List[str] The ith element of this list is the column (i.e. `label`) predicted
        by the ith TabularPredictor stored in this object. path : str, default = None Path to directory where models
        and intermediate outputs should be saved. If unspecified, a time-stamped folder called "AutogluonModels/ag-[
        TIMESTAMP]" will be created in the working directory to store all models. Note: To call `fit()` twice and
        save all results of each fit, you must specify different `path` locations or don't specify `path` at all.
        Otherwise files from first `fit()` will be overwritten by second `fit()`. Caution: when predicting many
        labels, this directory may grow large as it needs to store many TabularPredictors. problem_types : List[str],
        default = None The ith element is the `problem_type` for the ith TabularPredictor stored in this object.
        eval_metrics : List[str], default = None The ith element is the `eval_metric` for the ith TabularPredictor
        stored in this object. consider_labels_correlation : bool, default = True Whether the predictions of multiple
        labels should account for label correlations or predict each label independently of the others. If True,
        the ordering of `labels` may affect resulting accuracy as each label is predicted conditional on the previous
        labels appearing earlier in this list (i.e. in an auto-regressive fashion). Set to False if during inference
        you may want to individually use just the ith TabularPredictor without predicting all the other labels.
        kwargs : Arguments passed into the initialization of each TabularPredictor.

    """

    multi_predictor_file = 'multilabel_predictor.pkl'

    def __init__(self, labels, path=None, problem_types=None, eval_metrics=None, consider_labels_correlation=True,
                 **kwargs):
        self.model_root = None
        if len(labels) < 2:
            raise ValueError(
                "MultilabelPredictor is only intended for predicting MULTIPLE labels (columns), use TabularPredictor "
                "for predicting one label (column).")
        if (problem_types is not None) and (len(problem_types) != len(labels)):
            raise ValueError("If provided, `problem_types` must have same length as `labels`")
        if (eval_metrics is not None) and (len(eval_metrics) != len(labels)):
            raise ValueError("If provided, `eval_metrics` must have same length as `labels`")
        self.path = setup_outputdir(path, warn_if_exist=False)
        self.labels = labels
        self.consider_labels_correlation = consider_labels_correlation
        self.predictors = {}  # key = label, value = TabularPredictor or str path to the TabularPredictor for this label
        if eval_metrics is None:
            self.eval_metrics = {}
        else:
            self.eval_metrics = {labels[i]: eval_metrics[i] for i in range(len(labels))}
        problem_type = None
        eval_metric = None
        for i in range(len(labels)):
            label = labels[i]
            path_i = self.path + "Predictor_" + label
            if problem_types is not None:
                problem_type = problem_types[i]
            if eval_metrics is not None:
                eval_metric = eval_metrics[i]
            self.predictors[label] = TabularPredictor(label=label, problem_type=problem_type, eval_metric=eval_metric,
                                                      path=path_i, **kwargs)

    def fit(self, train_data, tuning_data=None, **kwargs):
        """ Fits a separate TabularPredictor to predict each of the labels.

            Parameters
            ----------
            train_data, tuning_data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                See documentation for `TabularPredictor.fit()`.
            kwargs :
                Arguments passed into the `fit()` call for each TabularPredictor.
        """
        if isinstance(train_data, str):
            train_data = TabularDataset(train_data)
        if tuning_data is not None and isinstance(tuning_data, str):
            tuning_data = TabularDataset(tuning_data)
        train_data_og = train_data.copy()
        if tuning_data is not None:
            tuning_data_og = tuning_data.copy()
        else:
            tuning_data_og = None
        save_metrics = len(self.eval_metrics) == 0
        for i in range(len(self.labels)):
            label = self.labels[i]
            predictor = self.get_predictor(label)
            if not self.consider_labels_correlation:
                labels_to_drop = [l for l in self.labels if l != label]
            else:
                labels_to_drop = [self.labels[j] for j in range(i + 1, len(self.labels))]
            train_data = train_data_og.drop(labels_to_drop, axis=1)
            if tuning_data is not None:
                tuning_data = tuning_data_og.drop(labels_to_drop, axis=1)
            print(f"Fitting TabularPredictor for label: {label} ...")
            predictor.fit(train_data=train_data, tuning_data=tuning_data, **kwargs)
            self.predictors[label] = predictor.path
            if save_metrics:
                self.eval_metrics[label] = predictor.eval_metric
        self.save()

    def predict(self, data, **kwargs):
        """ Returns DataFrame with label columns containing predictions for each label.

            Parameters ---------- data : str or autogluon.tabular.TabularDataset or pd.DataFrame Data to make
            predictions for. If label columns are present in this data, they will be ignored. See documentation for
            `TabularPredictor.predict()`. kwargs : Arguments passed into the predict() call for each TabularPredictor.
        """
        return self._predict(data, as_proba=False, **kwargs)

    def predict_proba(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `predict_proba()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to make predictions for. See documentation for `TabularPredictor.predict()` and `TabularPredictor.predict_proba()`.
            kwargs :
                Arguments passed into the `predict_proba()` call for each TabularPredictor (also passed into a `predict()` call).
        """
        return self._predict(data, as_proba=True, **kwargs)

    def evaluate(self, data, **kwargs):
        """ Returns dict where each key is a label and the corresponding value is the `evaluate()` output for just that label.

            Parameters
            ----------
            data : str or autogluon.tabular.TabularDataset or pd.DataFrame
                Data to evalate predictions of all labels for, must contain all labels as columns. See documentation for `TabularPredictor.evaluate()`.
            kwargs :
                Arguments passed into the `evaluate()` call for each TabularPredictor (also passed into the `predict()` call).
        """
        data = self._get_data(data)
        eval_dict = {}
        for label in self.labels:
            print(f"Evaluating TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            eval_dict[label] = predictor.evaluate(data, **kwargs)
            if self.consider_labels_correlation:
                data[label] = predictor.predict(data, **kwargs)
        return eval_dict

    def save(self):
        """ Save MultilabelPredictor to disk. """
        for label in self.labels:
            if not isinstance(self.predictors[label], str):
                self.predictors[label] = self.predictors[label].path
        save_pkl.save(path=self.path + self.multi_predictor_file, object=self)
        print(f"MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('{self.path}')")

    @classmethod
    def load(cls, path):
        """ Load MultilabelPredictor from disk `path` previously specified when creating this MultilabelPredictor. """
        predictor_instance = load_pkl.load(path=os.path.join(path, cls.multi_predictor_file))
        predictor_instance.model_root = path
        return predictor_instance

    def get_predictor(self, label):
        """ Returns TabularPredictor which is used to predict this label. """
        predictor = self.predictors[label]
        if isinstance(predictor, str):
            path_elements = predictor.split("/")
            path_relative_to_root = path_elements[-2] + "/" + path_elements[-1]
            return TabularPredictor.load(path=os.path.join(self.model_root, path_relative_to_root))
        return predictor

    def _get_data(self, data):
        if isinstance(data, str):
            return TabularDataset(data)
        return data.copy()

    def _predict(self, data, as_proba=False, **kwargs):
        data = self._get_data(data)
        if as_proba:
            predproba_dict = {}
        for label in self.labels:
            #             print(f"Predicting with TabularPredictor for label: {label} ...")
            predictor = self.get_predictor(label)
            if as_proba:
                predproba_dict[label] = predictor.predict_proba(data, as_multiclass=True, **kwargs)
            data[label] = predictor.predict(data, **kwargs)
        if not as_proba:
            return data[self.labels]
        else:
            return predproba_dict


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 10 12:05:43 2022

@author: Lyle
"""

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

ALL_STRUCTURAL_DATASET = "/content/drive/MyDrive/all_structural_data_aug.csv"


def one_hot_encode_material(data):
    data = data.copy()
    # One-hot encode the materials
    data.loc[:, "Material"] = pd.Categorical(data["Material"], categories=["Steel", "Aluminum", "Titanium"])
    mats_oh = pd.get_dummies(data["Material"], prefix="Material=", prefix_sep="")
    data.drop(["Material"], axis=1, inplace=True)
    data = pd.concat([mats_oh, data], axis=1)
    return data


def load_augmented_framed_dataset():
    reg_data = pd.read_csv(ALL_STRUCTURAL_DATASET, index_col=0)

    x = reg_data.iloc[:, :-11]

    x = one_hot_encode_material(x)

    x, x_scaler = scale(x)
    y = reg_data.iloc[:, -11:-1]

    for col in ['Sim 1 Safety Factor', 'Sim 3 Safety Factor']:
        y[col] = 1 / y[col]
        y.rename(columns={col: col + " (Inverted)"}, inplace=True)
    for col in ['Sim 1 Dropout X Disp.', 'Sim 1 Dropout Y Disp.', 'Sim 1 Bottom Bracket X Disp.',
                'Sim 1 Bottom Bracket Y Disp.', 'Sim 2 Bottom Bracket Z Disp.', 'Sim 3 Bottom Bracket Y Disp.',
                'Sim 3 Bottom Bracket X Rot.', 'Model Mass']:
        y[col] = [np.abs(val) for val in y[col].values]
        y.rename(columns={col: col + " Magnitude"}, inplace=True)
    y, y_scaler = scale(y)

    return x, y, x_scaler, y_scaler


def scale(v):
    v_scaler = StandardScaler()
    v_scaler.fit(v)
    v_scaled_values = v_scaler.transform(v)
    new_v = pd.DataFrame(v_scaled_values, columns=v.columns, index=v.index)
    return new_v, v_scaler

In [5]:
x_scaled, y_scaled, x_scaler, y_scaler = load_augmented_framed_dataset()
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_scaled, random_state=2023)


  data.loc[:, "Material"] = pd.Categorical(data["Material"], categories=["Steel", "Aluminum", "Titanium"])


In [8]:
print(len(x_test), len(y_test))
len(x_train), len(y_train)

3713 3713


(11138, 11138)

In [9]:
full_training_set = pd.concat([x_train, y_train], axis=1)
len(full_training_set)

11138

In [10]:
my_predictor = MultilabelPredictor(labels=y_scaled.columns)
my_predictor.fit(
    train_data=full_training_set
)

Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20230311_154304/Predictor_Sim 1 Dropout X Disp. Magnitude/"
AutoGluon Version:  0.7.0
Python Version:     3.9.16
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Sat Dec 10 16:00:40 UTC 2022
Train Data Rows:    11138
Train Data Columns: 39
Label Column: Sim 1 Dropout X Disp. Magnitude
Preprocessing data ...
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (14.664168867583603, -0.8695634632320092, -0.00307, 0.9914)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    11611

Fitting TabularPredictor for label: Sim 1 Dropout X Disp. Magnitude ...


	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 39 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 34 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	39 features in original data used to generate 39 features in processed data.
	Train Data (Processed) Memory Usage: 3.09 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.2s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automaticall

[1000]	valid_set's rmse: 0.356845


	-0.355	 = Validation score   (-root_mean_squared_error)
	9.55s	 = Training   runtime
	0.13s	 = Validation runtime
Fitting model: LightGBM ...
	-0.3369	 = Validation score   (-root_mean_squared_error)
	6.78s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.3869	 = Validation score   (-root_mean_squared_error)
	78.31s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	-0.35	 = Validation score   (-root_mean_squared_error)
	16.44s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.4165	 = Validation score   (-root_mean_squared_error)
	19.08s	 = Training   runtime
	0.29s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.3167	 = Validation score   (-root_mean_squared_error)
	16.5s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: XGBoost ...
	-0.3625	 = Validation score   (-root_mean_squared_error)
	5.86s	 = Training   runtime
	0.05s	 = Validation runtime
Fi

Fitting TabularPredictor for label: Sim 1 Dropout Y Disp. Magnitude ...


		('float', [])     : 35 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.1s = Fit runtime
	40 features in original data used to generate 40 features in processed data.
	Train Data (Processed) Memory Usage: 3.17 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.19s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 10024, Val Rows: 1114
Fitting 11 L1 models ...
Fitting model: KNeighborsUnif ...
	-0.5667	 = Validation score   (-root_mean_squared_error)
	0.03s	 = Training   runtime
	0.08s	 = 

[1000]	valid_set's rmse: 0.457226
[2000]	valid_set's rmse: 0.451328
[3000]	valid_set's rmse: 0.449558
[4000]	valid_set's rmse: 0.449334
[5000]	valid_set's rmse: 0.449241
[6000]	valid_set's rmse: 0.449095
[7000]	valid_set's rmse: 0.449104
[8000]	valid_set's rmse: 0.449099
[9000]	valid_set's rmse: 0.449098
[10000]	valid_set's rmse: 0.449107


	-0.4491	 = Validation score   (-root_mean_squared_error)
	43.0s	 = Training   runtime
	0.97s	 = Validation runtime
Fitting model: LightGBM ...
	-0.3687	 = Validation score   (-root_mean_squared_error)
	4.76s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.3978	 = Validation score   (-root_mean_squared_error)
	86.48s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	-0.3514	 = Validation score   (-root_mean_squared_error)
	51.41s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.4065	 = Validation score   (-root_mean_squared_error)
	21.93s	 = Training   runtime
	0.29s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.2072	 = Validation score   (-root_mean_squared_error)
	15.94s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	-0.3788	 = Validation score   (-root_mean_squared_error)
	7.16s	 = Training   runtime
	0.02s	 = Validation runtim

[1000]	valid_set's rmse: 0.398831
[2000]	valid_set's rmse: 0.398723
[3000]	valid_set's rmse: 0.398723


	-0.3987	 = Validation score   (-root_mean_squared_error)
	59.29s	 = Training   runtime
	0.7s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.2072	 = Validation score   (-root_mean_squared_error)
	0.72s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 320.01s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20230311_154304/Predictor_Sim 1 Dropout Y Disp. Magnitude/")
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20230311_154304/Predictor_Sim 1 Bottom Bracket X Disp. Magnitude/"
AutoGluon Version:  0.7.0
Python Version:     3.9.16
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Sat Dec 10 16:00:40 UTC 2022
Train Data Rows:    11138
Train Data Columns: 41
Label Column: Sim 1 Bottom Bracket X Disp. Magnitude
Preprocessing data ...
AutoGluon infers your prediction problem is: 'reg

Fitting TabularPredictor for label: Sim 1 Bottom Bracket X Disp. Magnitude ...


		Fitting DropUniqueFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 41 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 36 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	41 features in original data used to generate 41 features in processed data.
	Train Data (Processed) Memory Usage: 3.26 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.24s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_met

[1000]	valid_set's rmse: 0.124837


	-0.1248	 = Validation score   (-root_mean_squared_error)
	28.16s	 = Training   runtime
	0.29s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.0435	 = Validation score   (-root_mean_squared_error)
	0.39s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 406.83s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20230311_154304/Predictor_Sim 1 Bottom Bracket X Disp. Magnitude/")
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20230311_154304/Predictor_Sim 1 Bottom Bracket Y Disp. Magnitude/"
AutoGluon Version:  0.7.0
Python Version:     3.9.16
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Sat Dec 10 16:00:40 UTC 2022
Train Data Rows:    11138
Train Data Columns: 42
Label Column: Sim 1 Bottom Bracket Y Disp. Magnitude
Preprocessing data ...
AutoGluon infers your prediction problem 

Fitting TabularPredictor for label: Sim 1 Bottom Bracket Y Disp. Magnitude ...


	42 features in original data used to generate 42 features in processed data.
	Train Data (Processed) Memory Usage: 3.35 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.19s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 10024, Val Rows: 1114
Fitting 11 L1 models ...
Fitting model: KNeighborsUnif ...
	-0.4072	 = Validation score   (-root_mean_squared_error)
	0.03s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: KNeighborsDist ...
	-0.4014	 = Validation score   (-root_mean_squared_error)
	0.03s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: LightGBMXT ...


[1000]	valid_set's rmse: 0.42825
[2000]	valid_set's rmse: 0.424594
[3000]	valid_set's rmse: 0.424182


	-0.4241	 = Validation score   (-root_mean_squared_error)
	16.21s	 = Training   runtime
	0.45s	 = Validation runtime
Fitting model: LightGBM ...
	-0.3329	 = Validation score   (-root_mean_squared_error)
	5.19s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.2823	 = Validation score   (-root_mean_squared_error)
	84.05s	 = Training   runtime
	0.28s	 = Validation runtime
Fitting model: CatBoost ...
	-0.3371	 = Validation score   (-root_mean_squared_error)
	9.76s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.2986	 = Validation score   (-root_mean_squared_error)
	19.31s	 = Training   runtime
	0.28s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.1444	 = Validation score   (-root_mean_squared_error)
	15.08s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: XGBoost ...
	-0.353	 = Validation score   (-root_mean_squared_error)
	8.39s	 = Training   runtime
	0.03s	 = Validation runtime


[1000]	valid_set's rmse: 0.296257


	-0.2962	 = Validation score   (-root_mean_squared_error)
	31.63s	 = Training   runtime
	0.34s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.1439	 = Validation score   (-root_mean_squared_error)
	0.41s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 219.75s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20230311_154304/Predictor_Sim 1 Bottom Bracket Y Disp. Magnitude/")
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20230311_154304/Predictor_Sim 2 Bottom Bracket Z Disp. Magnitude/"
AutoGluon Version:  0.7.0
Python Version:     3.9.16
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Sat Dec 10 16:00:40 UTC 2022
Train Data Rows:    11138
Train Data Columns: 43
Label Column: Sim 2 Bottom Bracket Z Disp. Magnitude
Preprocessing data ...
AutoGluon infers your prediction problem 

Fitting TabularPredictor for label: Sim 2 Bottom Bracket Z Disp. Magnitude ...


Data preprocessing and feature engineering runtime = 0.18s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatically generating train/validation split with holdout_frac=0.1, Train Rows: 10024, Val Rows: 1114
Fitting 11 L1 models ...
Fitting model: KNeighborsUnif ...
	-0.5513	 = Validation score   (-root_mean_squared_error)
	0.03s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: KNeighborsDist ...
	-0.5319	 = Validation score   (-root_mean_squared_error)
	0.03s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: LightGBMXT ...


[1000]	valid_set's rmse: 0.263438
[2000]	valid_set's rmse: 0.258474
[3000]	valid_set's rmse: 0.2562
[4000]	valid_set's rmse: 0.255452
[5000]	valid_set's rmse: 0.254999
[6000]	valid_set's rmse: 0.254737
[7000]	valid_set's rmse: 0.254693
[8000]	valid_set's rmse: 0.254678
[9000]	valid_set's rmse: 0.254656
[10000]	valid_set's rmse: 0.254631


	-0.2546	 = Validation score   (-root_mean_squared_error)
	43.12s	 = Training   runtime
	1.66s	 = Validation runtime
Fitting model: LightGBM ...
	-0.2639	 = Validation score   (-root_mean_squared_error)
	5.27s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.3525	 = Validation score   (-root_mean_squared_error)
	86.49s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	-0.1883	 = Validation score   (-root_mean_squared_error)
	221.57s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.3359	 = Validation score   (-root_mean_squared_error)
	19.21s	 = Training   runtime
	0.17s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.2463	 = Validation score   (-root_mean_squared_error)
	11.59s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	-0.2474	 = Validation score   (-root_mean_squared_error)
	31.79s	 = Training   runtime
	0.15s	 = Validation run

[1000]	valid_set's rmse: 0.313272
[2000]	valid_set's rmse: 0.312932
[3000]	valid_set's rmse: 0.31292
[4000]	valid_set's rmse: 0.31292
[5000]	valid_set's rmse: 0.31292
[6000]	valid_set's rmse: 0.312919
[7000]	valid_set's rmse: 0.312919


	-0.3129	 = Validation score   (-root_mean_squared_error)
	160.13s	 = Training   runtime
	1.35s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.1617	 = Validation score   (-root_mean_squared_error)
	0.71s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 625.27s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20230311_154304/Predictor_Sim 2 Bottom Bracket Z Disp. Magnitude/")
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20230311_154304/Predictor_Sim 3 Bottom Bracket Y Disp. Magnitude/"
AutoGluon Version:  0.7.0
Python Version:     3.9.16
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Sat Dec 10 16:00:40 UTC 2022
Train Data Rows:    11138
Train Data Columns: 44
Label Column: Sim 3 Bottom Bracket Y Disp. Magnitude
Preprocessing data ...
AutoGluon infers your prediction problem

Fitting TabularPredictor for label: Sim 3 Bottom Bracket Y Disp. Magnitude ...


	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 44 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 39 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	44 features in original data used to generate 44 features in processed data.
	Train Data (Processed) Memory Usage: 3.53 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.24s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this,

[1000]	valid_set's rmse: 0.375837
[2000]	valid_set's rmse: 0.368725
[3000]	valid_set's rmse: 0.367026
[4000]	valid_set's rmse: 0.366212
[5000]	valid_set's rmse: 0.365892
[6000]	valid_set's rmse: 0.365705
[7000]	valid_set's rmse: 0.365667
[8000]	valid_set's rmse: 0.365578
[9000]	valid_set's rmse: 0.365523
[10000]	valid_set's rmse: 0.36551


	-0.3655	 = Validation score   (-root_mean_squared_error)
	46.12s	 = Training   runtime
	1.53s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 0.353126


	-0.352	 = Validation score   (-root_mean_squared_error)
	10.33s	 = Training   runtime
	0.24s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.3941	 = Validation score   (-root_mean_squared_error)
	97.03s	 = Training   runtime
	0.17s	 = Validation runtime
Fitting model: CatBoost ...
	-0.3511	 = Validation score   (-root_mean_squared_error)
	231.89s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.4309	 = Validation score   (-root_mean_squared_error)
	17.13s	 = Training   runtime
	0.27s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.2992	 = Validation score   (-root_mean_squared_error)
	13.84s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: XGBoost ...
	-0.3977	 = Validation score   (-root_mean_squared_error)
	23.64s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-0.3927	 = Validation score   (-root_mean_squared_error)
	22.62s	 = Training   runtime
	0.04s	 = Validati

Fitting TabularPredictor for label: Sim 3 Bottom Bracket X Rot. Magnitude ...


	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 45 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 40 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	45 features in original data used to generate 45 features in processed data.
	Train Data (Processed) Memory Usage: 3.62 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.23s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatical

[1000]	valid_set's rmse: 0.419921
[2000]	valid_set's rmse: 0.404316
[3000]	valid_set's rmse: 0.399848
[4000]	valid_set's rmse: 0.388487
[5000]	valid_set's rmse: 0.382981
[6000]	valid_set's rmse: 0.381903
[7000]	valid_set's rmse: 0.381617
[8000]	valid_set's rmse: 0.38153
[9000]	valid_set's rmse: 0.381497
[10000]	valid_set's rmse: 0.381501


	-0.3815	 = Validation score   (-root_mean_squared_error)
	68.74s	 = Training   runtime
	1.64s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.4025	 = Validation score   (-root_mean_squared_error)
	94.97s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	-0.3408	 = Validation score   (-root_mean_squared_error)
	240.27s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.4167	 = Validation score   (-root_mean_squared_error)
	19.23s	 = Training   runtime
	0.25s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.3552	 = Validation score   (-root_mean_squared_error)
	14.17s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: XGBoost ...
	-0.3811	 = Validation score   (-root_mean_squared_error)
	10.25s	 = Training   runtime
	0.07s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-0.3852	 = Validation score   (-root_mean_squared_error)
	32.25s	 = Training   runtime
	0.07s	 = Validat

Fitting TabularPredictor for label: Sim 1 Safety Factor (Inverted) ...


	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 46 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 41 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	46 features in original data used to generate 46 features in processed data.
	Train Data (Processed) Memory Usage: 3.71 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.26s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be

[1000]	valid_set's rmse: 2.56812
[2000]	valid_set's rmse: 2.56246
[3000]	valid_set's rmse: 2.56176
[4000]	valid_set's rmse: 2.56153


	-2.5613	 = Validation score   (-root_mean_squared_error)
	22.18s	 = Training   runtime
	0.6s	 = Validation runtime
Fitting model: LightGBM ...
	-2.5875	 = Validation score   (-root_mean_squared_error)
	7.91s	 = Training   runtime
	0.13s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-2.6204	 = Validation score   (-root_mean_squared_error)
	142.09s	 = Training   runtime
	0.29s	 = Validation runtime
Fitting model: CatBoost ...
	-2.5992	 = Validation score   (-root_mean_squared_error)
	43.46s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-2.5901	 = Validation score   (-root_mean_squared_error)
	20.57s	 = Training   runtime
	0.29s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-2.2301	 = Validation score   (-root_mean_squared_error)
	12.04s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: XGBoost ...
	-2.5968	 = Validation score   (-root_mean_squared_error)
	11.25s	 = Training   runtime
	0.05s	 = Validation runt

Fitting TabularPredictor for label: Sim 3 Safety Factor (Inverted) ...


	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 47 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 42 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	47 features in original data used to generate 47 features in processed data.
	Train Data (Processed) Memory Usage: 3.8 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.24s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be 

[1000]	valid_set's rmse: 2.60588
[2000]	valid_set's rmse: 2.59912
[3000]	valid_set's rmse: 2.59795
[4000]	valid_set's rmse: 2.59745
[5000]	valid_set's rmse: 2.59739


	-2.5973	 = Validation score   (-root_mean_squared_error)
	30.18s	 = Training   runtime
	0.65s	 = Validation runtime
Fitting model: LightGBM ...
	-2.5013	 = Validation score   (-root_mean_squared_error)
	4.74s	 = Training   runtime
	0.08s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-2.4389	 = Validation score   (-root_mean_squared_error)
	123.61s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost ...
	-2.5392	 = Validation score   (-root_mean_squared_error)
	14.41s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-2.3529	 = Validation score   (-root_mean_squared_error)
	26.37s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 3: early stopping
	-0.6898	 = Validation score   (-root_mean_squared_error)
	8.91s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	-2.4681	 = Validation score   (-root_mean_squared_error)
	10.76s	 =

Fitting TabularPredictor for label: Model Mass Magnitude ...


	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 48 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include', ...]
	Types of features in processed data (raw dtype, special dtypes):
		('float', [])     : 43 | ['CS Length', 'BB Drop', 'Stack', 'SS E', 'ST Angle', ...]
		('int', ['bool']) :  5 | ['Material=Steel', 'Material=Aluminum', 'Material=Titanium', 'SSB_Include', 'CSB_Include']
	0.2s = Fit runtime
	48 features in original data used to generate 48 features in processed data.
	Train Data (Processed) Memory Usage: 3.89 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.22s ...
AutoGluon will gauge predictive performance using evaluation metric: 'root_mean_squared_error'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.
	To change this, specify the eval_metric parameter of Predictor()
Automatical

[1000]	valid_set's rmse: 0.130262
[2000]	valid_set's rmse: 0.121705
[3000]	valid_set's rmse: 0.119761
[4000]	valid_set's rmse: 0.118868
[5000]	valid_set's rmse: 0.118418
[6000]	valid_set's rmse: 0.118199
[7000]	valid_set's rmse: 0.1181
[8000]	valid_set's rmse: 0.118005
[9000]	valid_set's rmse: 0.117959
[10000]	valid_set's rmse: 0.117932


	-0.1179	 = Validation score   (-root_mean_squared_error)
	50.92s	 = Training   runtime
	1.44s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 0.139065
[2000]	valid_set's rmse: 0.137905
[3000]	valid_set's rmse: 0.137671
[4000]	valid_set's rmse: 0.137545
[5000]	valid_set's rmse: 0.137443
[6000]	valid_set's rmse: 0.137409
[7000]	valid_set's rmse: 0.137388
[8000]	valid_set's rmse: 0.137382
[9000]	valid_set's rmse: 0.137379
[10000]	valid_set's rmse: 0.137375


	-0.1374	 = Validation score   (-root_mean_squared_error)
	70.46s	 = Training   runtime
	1.45s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.2172	 = Validation score   (-root_mean_squared_error)
	90.23s	 = Training   runtime
	0.28s	 = Validation runtime
Fitting model: CatBoost ...
	-0.1095	 = Validation score   (-root_mean_squared_error)
	242.05s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.2049	 = Validation score   (-root_mean_squared_error)
	20.22s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.1252	 = Validation score   (-root_mean_squared_error)
	13.05s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	-0.1423	 = Validation score   (-root_mean_squared_error)
	32.65s	 = Training   runtime
	0.14s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-0.1322	 = Validation score   (-root_mean_squared_error)
	43.27s	 = Training   runtime
	0.08s	 = Validat

[1000]	valid_set's rmse: 0.151324
[2000]	valid_set's rmse: 0.15108
[3000]	valid_set's rmse: 0.151059
[4000]	valid_set's rmse: 0.151057
[5000]	valid_set's rmse: 0.151056
[6000]	valid_set's rmse: 0.151056
[7000]	valid_set's rmse: 0.151056


	-0.1511	 = Validation score   (-root_mean_squared_error)
	185.56s	 = Training   runtime
	1.28s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.1011	 = Validation score   (-root_mean_squared_error)
	0.4s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 764.32s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20230311_154304/Predictor_Model Mass Magnitude/")


MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('AutogluonModels/ag-20230311_154304/')


In [11]:
my_predictor = MultilabelPredictor.load("AutogluonModels/ag-20230311_154304/")
predictions = my_predictor.predict(x_test)
r2 = r2_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)

r2, mse, mae

(0.63394409450457, 0.21365510395757425, 0.12860334010343127)

In [12]:
import shutil

shutil.make_archive("trained-model-8pm", "zip", "/content/AutogluonModels")


'/content/trained-model-8pm.zip'