# Regression using Boston Data

In [1]:
import shap
import mlfoundry as mlf
import pandas as pd 
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

2022-04-06 00:28:38.789 INFO    streamlit_gradio.networking: Hashes generated for all static assets.


### Loading Data

In [2]:
boston = load_boston()

# Create a Pandas dataframe with all the features
X = pd.DataFrame(data = boston['data'], columns = boston['feature_names'])
y = boston['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

### Creating mlfoundry runs

In [3]:
mlf_api = mlf.get_client() # to save locally
mlf_run = mlf_api.create_run(project_name='boston-project')

2022-04-06 00:28:43.918 INFO    mlfoundry.mlfoundry_api: Run is created with id 3d6d1496f86f4554a11e0d5ca7f9d0a2 and name run_2022-04-05_18-58-43_utc


### Logging Dataset

In [4]:
mlf_run.log_dataset(X_train, data_slice="train")
mlf_run.log_dataset(X_test, data_slice="test")

### Training Model

In [5]:
rf_reg = RandomForestRegressor(n_estimators=100)
rf_reg.fit(X_train, y_train)

RandomForestRegressor()

### Logging Parameters & Model

In [6]:
mlf_run.log_params(rf_reg.get_params())
mlf_run.log_model(rf_reg, "sklearn")

# logging predictions
y_hat_train = rf_reg.predict(X_train)
y_hat_test = rf_reg.predict(X_test)

2022-04-06 00:28:44.177 INFO    mlfoundry.mlfoundry_run: Parameters logged successfully
2022-04-06 00:28:46.218 INFO    mlfoundry.mlfoundry_run: Model logged Successfully


### Logging metrics

In [7]:
from sklearn.metrics import r2_score

metrics_dict = {}

metrics_dict['r2_score'] = r2_score(y_test, y_hat_test)

mlf_run.log_metrics(metrics_dict)

2022-04-06 00:28:46.254 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully


### Logging Dataset Stats

In [8]:
import shap

X_test_df = X_test.copy()
X_test_df['targets'] = y_test
X_test_df['predictions'] = y_hat_test

# shap value computation
explainer = shap.TreeExplainer(rf_reg)
shap_values = explainer.shap_values(X_test)


mlf_run.log_dataset_stats(
    X_test_df, 
    data_slice="test",
    data_schema=mlf.Schema(
        feature_column_names=list(boston['feature_names']),
        prediction_column_name="predictions",
        actual_column_name="targets"
    ),
    model_type="regression",
    shap_values=shap_values
)

Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-04-06 00:28:46.816 INFO    whylogs.app.config: No config file loaded
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-04-06 00:28:46.881 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-04-06 00:28:46.896 INFO    mlfoundry.mlfoundry_run: Dataset stats have been successfully computed and logged


WARN: Missing config
