In [13]:
import pickle
import pandas as pd
import numpy as np
import mlfoundry as mlf
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split

# Loading Iris Dataset

In [14]:
iris = datasets.load_iris()
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

# Using MlFoundry APIs

## Creating run

In [15]:
mlf_api = mlf.get_client() # to save locally
mlf_run = mlf_api.create_run(project_name='iris-project')

2022-04-06 00:31:54.784 INFO    mlfoundry.mlfoundry_api: Run is created with id 220d6f4479dc4d5ba8b783fc07e30480 and name run_2022-04-05_19-01-54_utc


## Training the model

In [16]:
iris = datasets.load_iris()
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

In [17]:
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model Training
clf = svm.SVC(gamma='scale', kernel='rbf', probability=True)
clf.fit(X, y)

SVC(probability=True)

## Logging the dataset

In [18]:
train_data = np.hstack([X_train, np.reshape(y_train, (-1,1))])
train_data = pd.DataFrame(train_data, columns=iris.feature_names + ["target"])

test_data = np.hstack([X_test, np.reshape(y_test, (-1,1))])
test_data = pd.DataFrame(test_data, columns=iris.feature_names + ["target"])

In [19]:
mlf_run.log_dataset(train_data, data_slice="train")
mlf_run.log_dataset(test_data, data_slice="test")

## Logging Parameters

In [20]:
params = {'classes': clf.classes_, 'features': clf.n_features_in_}
mlf_run.log_params(params)

2022-04-06 00:33:20.289 INFO    mlfoundry.mlfoundry_run: Parameters logged successfully


## Logging the model

In [21]:
mlf_run.log_model(clf, "sklearn")

2022-04-06 00:33:23.770 INFO    mlfoundry.mlfoundry_run: Model logged Successfully


## Computing predictions

In [22]:
y_hat_train = clf.predict(X_train)
y_hat_test = clf.predict(X_test)

## Logging the metrics

In [23]:
from sklearn.metrics import accuracy_score, f1_score
metrics_dict = {}

metrics_dict['accuracy_score'] = accuracy_score(y_test, y_hat_test)
metrics_dict['f1_score'] = f1_score(y_test, y_hat_test, average='micro')

mlf_run.log_metrics(metrics_dict)


2022-04-06 00:33:40.337 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully


## Logging the Dataset Stats

In [24]:
import shap

y_train_prob = clf.predict_proba(X_train)


X_train_df = pd.DataFrame(X_train, columns=iris.feature_names)
X_train_df['targets'] = y_train
X_train_df['predictions'] = y_hat_train
X_train_df['prediction_probabilities'] = list(y_train_prob)

X_test_df = pd.DataFrame(X_test, columns=iris.feature_names)
X_test_df['targets'] = y_test
X_test_df['predictions'] = y_hat_test

# compute and log stats for train data without shap
mlf_run.log_dataset_stats(
    X_train_df, 
    data_slice="train",
    data_schema=mlf.Schema(
        feature_column_names=iris.feature_names,
        prediction_column_name="predictions",
        actual_column_name="targets",
        prediction_probability_column_name="prediction_probabilities"   # to calculate probability related metrics
    ),
    model_type="multiclass_classification",
)

# shap value computation
X_train_df1 = pd.DataFrame(X_train, columns=iris.feature_names)
X_test_df1 = pd.DataFrame(X_test, columns=iris.feature_names)
explainer = shap.KernelExplainer(clf.predict_proba, X_train_df1)
shap_values = explainer.shap_values(X_test_df1)

mlf_run.log_dataset_stats(
    X_test_df, 
    data_slice="test",
    data_schema=mlf.Schema(
        feature_column_names=iris.feature_names,
        prediction_column_name="predictions",
        actual_column_name="targets"
    ),
    model_type="multiclass_classification",
    shap_values=shap_values
)

Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-04-06 00:34:13.928 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully
2022-04-06 00:34:13.929 INFO    mlfoundry.mlfoundry_run: Dataset stats have been successfully computed and logged


  0%|          | 0/30 [00:00<?, ?it/s]

2022-04-06 00:34:13.968 INFO    shap: num_full_subsets = 2
2022-04-06 00:34:13.982 INFO    shap: phi = [-0.01231872 -0.0104041  -0.26925947 -0.03009148]
2022-04-06 00:34:13.983 INFO    shap: phi = [ 0.01450993 -0.00158278 -0.09976835 -0.19431223]
2022-04-06 00:34:13.984 INFO    shap: phi = [-0.00219121  0.01198688  0.36902782  0.2244037 ]
2022-04-06 00:34:13.998 INFO    shap: num_full_subsets = 2
2022-04-06 00:34:14.011 INFO    shap: phi = [-0.00396011 -0.02022687 -0.28188845 -0.01824229]
2022-04-06 00:34:14.012 INFO    shap: phi = [-0.00158819  0.00382546  0.57313894  0.07792017]
2022-04-06 00:34:14.013 INFO    shap: phi = [ 0.00554829  0.01640141 -0.29125049 -0.05967788]
2022-04-06 00:34:14.028 INFO    shap: num_full_subsets = 2
2022-04-06 00:34:14.041 INFO    shap: phi = [-0.01483173  0.00053047 -0.25977432 -0.04887407]
2022-04-06 00:34:14.043 INFO    shap: phi = [ 0.01908547  0.00979438 -0.14587018 -0.20835986]
2022-04-06 00:34:14.044 INFO    shap: phi = [-0.00425374 -0.01032485  0

2022-04-06 00:34:14.635 INFO    shap: phi = [0.03580909 0.02845399 0.46608411 0.08521751]
2022-04-06 00:34:14.637 INFO    shap: phi = [-0.04029048 -0.02073405 -0.27580619  0.05114555]
2022-04-06 00:34:14.638 INFO    shap: phi = [ 0.00448139 -0.00771993 -0.19027792 -0.13636305]
2022-04-06 00:34:14.657 INFO    shap: num_full_subsets = 2
2022-04-06 00:34:14.670 INFO    shap: phi = [0.01138828 0.01775528 0.57317796 0.04403183]
2022-04-06 00:34:14.673 INFO    shap: phi = [-0.01202141 -0.00866161 -0.3824053   0.09088371]
2022-04-06 00:34:14.675 INFO    shap: phi = [ 0.00063313 -0.00909366 -0.19077265 -0.13491554]
2022-04-06 00:34:14.694 INFO    shap: num_full_subsets = 2
2022-04-06 00:34:14.704 INFO    shap: phi = [0.01259246 0.02811289 0.57248795 0.03538966]
2022-04-06 00:34:14.705 INFO    shap: phi = [-0.01352057 -0.01332739 -0.37566827  0.08789571]
2022-04-06 00:34:14.706 INFO    shap: phi = [ 0.00092811 -0.0147855  -0.19681967 -0.12328537]
2022-04-06 00:34:14.719 INFO    shap: num_full_s

In [11]:
with open('iris_sklearn_model.pkl', 'wb') as f:
    pickle.dump(clf, f)

In [19]:
%%writefile streamlit_iris.py
import pickle 
import random
import mlfoundry as mlf
import pandas as pd
import streamlit as st

def predict_fn(i1: float, i2: float, i3: float, i4: float) -> str:
    try:
        class_name_map = dict(
            zip([0, 1, 2], ["Iris Setosa", "Iris Versicolour", "Iris Virginica"])
        )
        features = pd.DataFrame([[i1, i2, i3, i4]])
        with open('iris_sklearn_model.pkl', 'rb') as f:
            model = pickle.load(f)
        model_output = model.predict(features)[0]
        return class_name_map[model_output]
    except Exception as e:
        print(f"Exception happened: {e}. Choosing random")
        return random.choice(["Iris Setosa", "Iris Versicolour", "Iris Virginica"])

mlf_api = mlf.get_client()
mlf_run = mlf_api.create_run(project_name="iris-project-webapp")
raw_in, raw_out = mlf_run.webapp(
    fn=predict_fn, inputs=["number", "number", "number", "number"], outputs="text"
)


with st.form(key="Model-Feedback", clear_on_submit=True):
    comments = [
                "Prediction looks good.", 
                "Prediction looks off.",  
                "Other comment."
               ]
    for comment in comments:
        val = st.checkbox(comment)

    st.write("Please enter more details")
    comment = st.text_input("Comment")
    form_submitted = st.form_submit_button(label="Submit Feedback")
    if form_submitted:
        print(f"User comment type: {val}. Comment: {comment}")
        st.success("Feedback Printed")

Overwriting streamlit_iris.py


In [13]:
mlf_run.log_webapp_file('streamlit_iris.py')

In [None]:
!mlfoundry ui 