In [1]:
import pickle
import pandas as pd
import numpy as np
import mlfoundry as mlf
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split

2022-03-22 23:16:11.397 INFO    streamlit_gradio.networking: Hashes generated for all static assets.


# Loading Iris Dataset

In [2]:
iris = datasets.load_iris()
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

# Using MlFoundry APIs

## Creating run

In [3]:
mlf_api = mlf.get_client() # to save locally
mlf_run = mlf_api.create_run(project_name='iris-project')

2022-03-22 23:16:21.137 INFO    mlfoundry.mlfoundry_api: Run is created with id 79e3490ca6d54c978729d89003f8652e and name run_2022-03-22_17-46-21_utc


## Training the model

In [None]:
iris = datasets.load_iris()
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

In [4]:
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model Training
clf = svm.SVC(gamma='scale', kernel='rbf', probability=True)
clf.fit(X, y)

SVC(probability=True)

## Logging the dataset

In [29]:
train_data = np.hstack([X_train, np.reshape(y_train, (-1,1))])
train_data = pd.DataFrame(train_data, columns=iris.feature_names + ["target"])

test_data = np.hstack([X_test, np.reshape(y_test, (-1,1))])
test_data = pd.DataFrame(test_data, columns=iris.feature_names + ["target"])

In [None]:
mlf_run.log_dataset(train_data, data_slice=mlf.DataSlice.TRAIN)
mlf_run.log_dataset(test_data, data_slice=mlf.DataSlice.TEST)

## Logging Parameters

In [6]:
params = {'classes': clf.classes_, 'features': clf.n_features_in_}
mlf_run.log_params(params)

2022-03-14 17:32:14.679 INFO    mlfoundry.mlfoundry_run: Parameters logged successfully


## Logging the model

In [7]:
mlf_run.log_model(clf, mlf.ModelFramework.SKLEARN)

2022-03-14 17:32:17.201 INFO    mlfoundry.mlfoundry_run: Model logged Successfully


## Logging Predictions Synchronously

In [8]:
y_hat_train = clf.predict(X_train)
y_hat_test = clf.predict(X_test)

## Logging the metrics

In [9]:
from sklearn.metrics import accuracy_score, f1_score
metrics_dict = {}

metrics_dict['accuracy_score'] = accuracy_score(y_test, y_hat_test)
metrics_dict['f1_score'] = f1_score(y_test, y_hat_test, average='micro')

mlf_run.log_metrics(metrics_dict)


2022-03-14 17:32:17.247 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully


## Logging the Dataset Stats

In [10]:
import shap

y_train_prob = clf.predict_proba(X_train)


X_train_df = pd.DataFrame(X_train, columns=iris.feature_names)
X_train_df['targets'] = y_train
X_train_df['predictions'] = y_hat_train
X_train_df['prediction_probabilities'] = list(y_train_prob)

X_test_df = pd.DataFrame(X_test, columns=iris.feature_names)
X_test_df['targets'] = y_test
X_test_df['predictions'] = y_hat_test

# compute and log stats for train data without shap
mlf_run.log_dataset_stats(
    X_train_df, 
    data_slice=mlf.DataSlice.TRAIN,
    data_schema=mlf.Schema(
        feature_column_names=iris.feature_names,
        prediction_column_name="predictions",
        actual_column_name="targets",
        prediction_probability_column_name="prediction_probabilities"   # to calculate probability related metrics
    ),
    model_type=mlf.ModelType.MULTICLASS_CLASSIFICATION,
)

# shap value computation
X_train_df1 = pd.DataFrame(X_train, columns=iris.feature_names)
X_test_df1 = pd.DataFrame(X_test, columns=iris.feature_names)
explainer = shap.KernelExplainer(clf.predict_proba, X_train_df1)
shap_values = explainer.shap_values(X_test_df1)

mlf_run.log_dataset_stats(
    X_test_df, 
    data_slice=mlf.DataSlice.TEST,
    data_schema=mlf.Schema(
        feature_column_names=iris.feature_names,
        prediction_column_name="predictions",
        actual_column_name="targets"
    ),
    model_type=mlf.ModelType.MULTICLASS_CLASSIFICATION,
    shap_values=shap_values
)

Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-03-14 17:32:17.887 INFO    whylogs.app.config: No config file loaded
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-03-14 17:32:17.944 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully
2022-03-14 17:32:17.949 INFO    mlfoundry.mlfoundry_run: Dataset stats have been successfully computed and logged


WARN: Missing config


  0%|          | 0/30 [00:00<?, ?it/s]

2022-03-14 17:32:18.022 INFO    shap: num_full_subsets = 2
2022-03-14 17:32:18.046 INFO    shap: phi = [-0.00217723 -0.00464712 -0.29849855 -0.01793215]
2022-03-14 17:32:18.048 INFO    shap: phi = [-0.00706495  0.00843823  0.49916939  0.07344129]
2022-03-14 17:32:18.050 INFO    shap: phi = [ 0.00924218 -0.00379111 -0.20067084 -0.05550914]
2022-03-14 17:32:18.074 INFO    shap: num_full_subsets = 2
2022-03-14 17:32:18.094 INFO    shap: phi = [0.02461519 0.00605985 0.54740426 0.06116714]
2022-03-14 17:32:18.095 INFO    shap: phi = [-0.02814859 -0.00345984 -0.36545083  0.07703238]
2022-03-14 17:32:18.097 INFO    shap: phi = [ 0.0035334  -0.00260001 -0.18195343 -0.13819952]
2022-03-14 17:32:18.114 INFO    shap: num_full_subsets = 2
2022-03-14 17:32:18.138 INFO    shap: phi = [-0.00483364 -0.00720178 -0.29516366 -0.01554248]
2022-03-14 17:32:18.139 INFO    shap: phi = [0.00156978 0.00707897 0.55195269 0.08143252]
2022-03-14 17:32:18.141 INFO    shap: phi = [ 3.26385786e-03  1.22817549e-04 -2

2022-03-14 17:32:19.130 INFO    shap: phi = [0.01755822 0.00695546 0.58673526 0.03930359]
2022-03-14 17:32:19.132 INFO    shap: phi = [-0.0206625  -0.00281297 -0.39949493  0.09331009]
2022-03-14 17:32:19.133 INFO    shap: phi = [ 0.00310428 -0.00414249 -0.18724034 -0.13261368]
2022-03-14 17:32:19.153 INFO    shap: num_full_subsets = 2
2022-03-14 17:32:19.173 INFO    shap: phi = [0.0249655  0.00768371 0.57468038 0.04210709]
2022-03-14 17:32:19.174 INFO    shap: phi = [-0.03342953 -0.0034133  -0.38291301  0.09051274]
2022-03-14 17:32:19.175 INFO    shap: phi = [ 0.00846403 -0.00427042 -0.19176737 -0.13261983]
2022-03-14 17:32:19.193 INFO    shap: num_full_subsets = 2
2022-03-14 17:32:19.211 INFO    shap: phi = [-0.00538918 -0.01553959 -0.26650728 -0.03282906]
2022-03-14 17:32:19.212 INFO    shap: phi = [-0.00134396 -0.01593006  0.00122541 -0.24693775]
2022-03-14 17:32:19.213 INFO    shap: phi = [0.00673314 0.03146965 0.26528187 0.27976681]
2022-03-14 17:32:19.228 INFO    shap: num_full_s

In [11]:
with open('iris_sklearn_model.pkl', 'wb') as f:
    pickle.dump(clf, f)

In [19]:
%%writefile streamlit_iris.py
import pickle 
import random
import mlfoundry as mlf
import pandas as pd
import streamlit as st

def predict_fn(i1: float, i2: float, i3: float, i4: float) -> str:
    try:
        class_name_map = dict(
            zip([0, 1, 2], ["Iris Setosa", "Iris Versicolour", "Iris Virginica"])
        )
        features = pd.DataFrame([[i1, i2, i3, i4]])
        with open('iris_sklearn_model.pkl', 'rb') as f:
            model = pickle.load(f)
        model_output = model.predict(features)[0]
        return class_name_map[model_output]
    except Exception as e:
        print(f"Exception happened: {e}. Choosing random")
        return random.choice(["Iris Setosa", "Iris Versicolour", "Iris Virginica"])

mlf_api = mlf.get_client()
mlf_run = mlf_api.create_run(project_name="iris-project-webapp")
raw_in, raw_out = mlf_run.webapp(
    fn=predict_fn, inputs=["number", "number", "number", "number"], outputs="text"
)


with st.form(key="Model-Feedback", clear_on_submit=True):
    comments = [
                "Prediction looks good.", 
                "Prediction looks off.",  
                "Other comment."
               ]
    for comment in comments:
        val = st.checkbox(comment)

    st.write("Please enter more details")
    comment = st.text_input("Comment")
    form_submitted = st.form_submit_button(label="Submit Feedback")
    if form_submitted:
        print(f"User comment type: {val}. Comment: {comment}")
        st.success("Feedback Printed")

Overwriting streamlit_iris.py


In [13]:
mlf_run.log_webapp_file('streamlit_iris.py')

In [None]:
!mlfoundry ui 