# Regression using Boston Data

In [8]:
import shap
import mlfoundry as mlf
import pandas as pd 
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

### Loading Data

In [9]:
boston = load_boston()

# Create a Pandas dataframe with all the features
X = pd.DataFrame(data = boston['data'], columns = boston['feature_names'])
y = boston['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

### Creating mlfoundry runs

In [10]:
mlf_api = mlf.get_client() # to save locally
mlf_run = mlf_api.create_run(project_name='boston-project')

2022-03-14 15:44:26.574 INFO    mlfoundry.mlfoundry_api: Run is created with id 38e88c8852734292a928b45c869f19d4 and name run_2022-03-14_22:44:26_utc


### Logging Dataset

In [11]:
mlf_run.log_dataset(X_train, data_slice=mlf.DataSlice.TRAIN)
mlf_run.log_dataset(X_test, data_slice=mlf.DataSlice.TEST)

### Training Model

In [12]:
rf_reg = RandomForestRegressor(n_estimators=100)
rf_reg.fit(X_train, y_train)

RandomForestRegressor()

### Logging Parameters & Model

In [13]:
mlf_run.log_params(rf_reg.get_params())
mlf_run.log_model(rf_reg, mlf.ModelFramework.SKLEARN)

# logging predictions
y_hat_train = rf_reg.predict(X_train)
y_hat_test = rf_reg.predict(X_test)

2022-03-14 15:44:30.774 INFO    mlfoundry.mlfoundry_run: Parameters logged successfully
2022-03-14 15:44:32.781 INFO    mlfoundry.mlfoundry_run: Model logged Successfully


### Logging metrics

In [14]:
from sklearn.metrics import r2_score

metrics_dict = {}

metrics_dict['r2_score'] = r2_score(y_test, y_hat_test)

mlf_run.log_metrics(metrics_dict)

2022-03-14 15:44:41.780 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully


### Logging Dataset Stats

In [15]:
import shap

X_test_df = X_test.copy()
X_test_df['targets'] = y_test
X_test_df['predictions'] = y_hat_test

# shap value computation
explainer = shap.TreeExplainer(rf_reg)
shap_values = explainer.shap_values(X_test)


mlf_run.log_dataset_stats(
    X_test_df, 
    data_slice=mlf.DataSlice.TEST,
    data_schema=mlf.Schema(
        feature_column_names=list(boston['feature_names']),
        prediction_column_name="predictions",
        actual_column_name="targets"
    ),
    model_type=mlf.ModelType.REGRESSION,
    shap_values=shap_values
)

Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-03-14 15:44:43.897 INFO    whylogs.app.config: No config file loaded
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-03-14 15:44:43.999 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-03-14 15:44:44.020 INFO    mlfoundry.mlfoundry_run: Dataset stats have been successfully computed and logged


WARN: Missing config
