In [1]:
from sklearn.datasets import load_wine
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

import mlfoundry as mlf

2022-04-06 00:34:35.956 INFO    streamlit_gradio.networking: Hashes generated for all static assets.


## Data preprocessing

In [2]:
data = load_wine()
print(data.keys())

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])


In [3]:
# Read the DataFrame, first using the feature data
df = pd.DataFrame(data.data, columns=data.feature_names)
# Add a target column, and fill it with the target data
df['target'] = data.target
# Show the first five rows
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


## Creating MLF Run

In [4]:
mlf_api = mlf.get_client()
mlf_run = mlf_api.create_run(project_name='wine-project')
mlf_run_2 = mlf_api.create_run(project_name='wine-project')

2022-04-06 00:34:47.036 INFO    mlfoundry.mlfoundry_api: Run is created with id 1cf204d949e3478283d1badc0feb3688 and name run_2022-04-05_19-04-46_utc
2022-04-06 00:34:47.125 INFO    mlfoundry.mlfoundry_api: Run is created with id fea33b1e91d54189b6c9bc32e81be0e5 and name run_2022-04-05_19-04-47_utc


In [5]:
# Store the feature data
X = data.data
X = pd.DataFrame(X, columns=data.feature_names)
# store the target data
y = data.target
# split the data using Scikit-Learn's train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

## Model training

In [6]:
tree_clf = RandomForestClassifier(n_estimators=100, max_depth=15)
tree_clf.fit(X_train, y_train)

tree_clf_1 = RandomForestClassifier(n_estimators=150, max_depth=10)
tree_clf_1.fit(X_train, y_train)

RandomForestClassifier(max_depth=10, n_estimators=150)

## Logging parameter

In [7]:
mlf_run.log_params({"n_estimators":100, "max_depth":15})
mlf_run_2.log_params({"n_estimators":150, "max_depth":10})

mlf_run.log_model(tree_clf, "sklearn")
mlf_run_2.log_model(tree_clf_1, "sklearn")

2022-04-06 00:34:58.084 INFO    mlfoundry.mlfoundry_run: Parameters logged successfully
2022-04-06 00:34:58.086 INFO    mlfoundry.mlfoundry_run: Parameters logged successfully
2022-04-06 00:34:59.809 INFO    mlfoundry.mlfoundry_run: Model logged Successfully
2022-04-06 00:35:01.090 INFO    mlfoundry.mlfoundry_run: Model logged Successfully


## Computing predictions

In [8]:
# logging predictions
y_hat_train = tree_clf.predict(X_train)
y_hat_test = tree_clf.predict(X_test)

y_hat_train_tree = tree_clf_1.predict(X_train)
y_hat_test_tree = tree_clf_1.predict(X_test)

## Logging metrics

In [9]:
from sklearn.metrics import accuracy_score, f1_score

f1 = f1_score(y_test, y_hat_test, average="micro")
accuracy = accuracy_score(y_test, y_hat_test)

mlf_run.log_metrics({"f1":f1, "accuracy_score":accuracy})

f1 = f1_score(y_test, y_hat_test_tree, average="micro")
accuracy = accuracy_score(y_test, y_hat_test_tree)
mlf_run_2.log_metrics({"f1":f1, "accuracy_score":accuracy})

2022-04-06 00:35:02.702 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully
2022-04-06 00:35:02.708 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully


## log_dataset_stats

In [10]:
import shap

X_test_df = X_test.copy()
X_test_df['targets'] = list(y_test)
X_test_df['predictions'] = list(y_hat_test)
X_test_df['prediction_probabilities'] = list(tree_clf.predict_proba(X_test))

# Shap value computation model 1 test set
explainer = shap.TreeExplainer(tree_clf)
shap_values = explainer.shap_values(X_test)

mlf_run.log_dataset_stats(
    X_test_df, 
    data_slice="test",
    data_schema=mlf.Schema(
        feature_column_names=list(data.feature_names),
        prediction_column_name="predictions",
        actual_column_name="targets",
        prediction_probability_column_name='prediction_probabilities'
    ),
    shap_values=shap_values,
    model_type="multiclass_classification",
)

X_test_df = X_test.copy()
X_test_df['targets'] = list(y_test)
X_test_df['predictions'] = list(y_hat_test_tree)
X_test_df['prediction_probabilities'] = list(tree_clf_1.predict_proba(X_test))

# shap value computation model 2 test set
explainer = shap.TreeExplainer(tree_clf_1)
shap_values = explainer.shap_values(X_test)

mlf_run_2.log_dataset_stats(
    X_test_df, 
    data_slice="test",
    data_schema=mlf.Schema(
        feature_column_names=list(data.feature_names),
        prediction_column_name="predictions",
        actual_column_name="targets",
        prediction_probability_column_name='prediction_probabilities'
    ),
    shap_values=shap_values,
    model_type="multiclass_classification",
)

Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-04-06 00:35:24.650 INFO    whylogs.app.config: No config file loaded
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-04-06 00:35:24.688 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-04-06 00:35:24.704 INFO    mlfoundry.mlfoundry_run: Dataset stats have been successfully computed and logged
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-04-06 00:35:24.777 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-04-06 00:35:24.792 INFO    mlfoundry.mlfoundry_ru

WARN: Missing config
