This example of a regression model is from Kaggle's [house prices prediction](https://www.kaggle.com/c/house-prices-advanced-regression-techniques) use case.

# Train a model

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.stats import skew

In [None]:
train = pd.read_csv("./train.csv")

In [None]:
#log transform the target
train["SalePrice"] = np.log1p(train["SalePrice"])

#log transform skewed numeric features
numeric_feats = train.dtypes[train.dtypes != "object"].index

skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna())) #compute skewness
skewed_feats = skewed_feats[skewed_feats > 0.75]
skewed_feats = skewed_feats.index

train[skewed_feats] = np.log1p(train[skewed_feats])

In [None]:
train = pd.get_dummies(train)

In [None]:
#filling NA's with the mean of the column
train = train.fillna(train.mean())

In [None]:
#creating matrices for sklearn
features = [col for col in train.columns if col != "SalePrice"]
X_train, X_test, y_train, y_test = train_test_split(
    train[features],
    train["SalePrice"],
    test_size=0.33,
    random_state=88,
)

In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

In [None]:
model_ridge = Ridge(alpha = 10, random_state=88).fit(X_train, y_train)

In [None]:
predictions = model_ridge.predict(X_test)

In [None]:
mean_squared_error(predictions, y_test, squared=False)

## Init DataContext

In [None]:
from trubrics.context import DataContext

In [None]:
data_context = DataContext(
    name="house_prices_dataset",
    version="0.0.1",
    training_data=X_train.assign(SalePrice=y_train),
    testing_data=X_test.assign(SalePrice=y_test),
    minimum_functionality_data=X_test.assign(SalePrice=y_test).head(),
    target="SalePrice"
)

## Build model validations

In [None]:
from trubrics.validations import ModelValidator

In [None]:
model_validator = ModelValidator(data=data_context, model=model_ridge)

In [None]:
validations = [
    model_validator.validate_performance_against_threshold(metric="neg_root_mean_squared_error", threshold=-0.15),
    model_validator.validate_performance_between_train_and_test(metric="neg_root_mean_squared_error", threshold=0.3),
    model_validator.validate_minimum_functionality_in_range(range_value=0.1, range_inclusive=True)
]

## Group validations in a trubric

In [None]:
from trubrics.validations import Trubric

trubric = Trubric(
    name="regression_trubric",
    model_name="my_model",
    model_version="0.0.1",
    data_context_name=data_context.name,
    data_context_version=data_context.version,
    tags=["nb-demo"],
    validations=validations,
)

## Save Locally

In [None]:
# save trubric to a local .json
trubric.save_local(path="regression_trubric.json")

## Save trubric to the Trubrics platform

First, we run `trubrics init` to connect to the platform:

In [None]:
import os
import getpass
os.environ["TRUBRICS_CONFIG_EMAIL"] = input("Enter your Trubrics account email:")
os.environ["TRUBRICS_CONFIG_PASSWORD"] = getpass.getpass("Enter your password:")
os.environ["TRUBRICS_PROJECT_NAME"] = input("Enter your Trubrics project name:")

In [None]:
! trubrics init --trubrics-user --project-name "$TRUBRICS_PROJECT_NAME"

Now we can save our `trubric` (or `new_trubric`) with the save_ui() method directly from our notebook, or a python script:

In [None]:
try:
    trubric.save_ui()
except Exception as e:
    print(f"Error in saving to the Trubrics platform:\n{e}")

or we can save it within an automated pipeline with the CLI:

In [None]:
! trubrics run \
  --save-ui \
  --run-context-path titanic-example-trubric \
  --trubric-output-file-path "cli_demo_trubric.json"

Note: this demo of our CLI will run our example trubric, not the trubric that we have saved in this tutorial.