In [70]:
import pickle
import pandas as pd
import numpy as np
import mlfoundry as mlf
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split

# Loading Iris Dataset

In [71]:
iris = datasets.load_iris()
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

# Using MlFoundry APIs

## Creating run

In [72]:
mlf_api = mlf.get_client() # to save locally
mlf_run = mlf_api.create_run(project_name='iris-project')

2022-03-14 17:03:11.228 INFO    mlfoundry.mlfoundry_api: Run is created with id 5dac775beb264626840031af2183ae90 and name run_2022-03-15_00:03:11_utc


## Logging the dataset

In [73]:
mlf_run.log_dataset(iris_frame, data_slice=mlf.DataSlice.TRAIN)  # saves in parquet format
mlf_run.log_dataset(iris_frame, data_slice=mlf.DataSlice.TEST, fileformat=mlf.FileFormat.CSV) # saves in csv format

## Training the model

In [74]:
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model Training
clf = svm.SVC(gamma='scale', kernel='rbf', probability=True)
clf.fit(X, y)

SVC(probability=True)

## Logging Parameters

In [75]:
params = {'classes': clf.classes_, 'features': clf.n_features_in_}
mlf_run.log_params(params)

2022-03-14 17:03:11.888 INFO    mlfoundry.mlfoundry_run: Parameters logged successfully


## Logging the model

In [76]:
mlf_run.log_model(clf, mlf.ModelFramework.SKLEARN)

2022-03-14 17:03:14.698 INFO    mlfoundry.mlfoundry_run: Model logged Successfully


## Logging Predictions Synchronously

In [77]:
y_hat_train = clf.predict(X_train)
y_hat_test = clf.predict(X_test)

## Logging the metrics

In [78]:
from sklearn.metrics import accuracy_score, f1_score
metrics_dict = {}

metrics_dict['accuracy_score'] = accuracy_score(y_test, y_hat_test)
metrics_dict['f1_score'] = f1_score(y_test, y_hat_test, average='micro')

mlf_run.log_metrics(metrics_dict)


2022-03-14 17:03:14.717 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully


## Logging the Dataset Stats

In [79]:
import shap

y_train_prob = clf.predict_proba(X_train)


X_train_df = pd.DataFrame(X_train, columns=iris.feature_names)
X_train_df['targets'] = y_train
X_train_df['predictions'] = y_hat_train
X_train_df['prediction_probabilities'] = list(y_train_prob)

X_test_df = pd.DataFrame(X_test, columns=iris.feature_names)
X_test_df['targets'] = y_test
X_test_df['predictions'] = y_hat_test

# compute and log stats for train data without shap
mlf_run.log_dataset_stats(
    X_train_df, 
    data_slice=mlf.DataSlice.TRAIN,
    data_schema=mlf.Schema(
        feature_column_names=iris.feature_names,
        prediction_column_name="predictions",
        actual_column_name="targets",
        prediction_probability_column_name="prediction_probabilities"   # to calculate probability related metrics
    ),
    model_type=mlf.ModelType.MULTICLASS_CLASSIFICATION,
)

# shap value computation
X_train_df1 = pd.DataFrame(X_train, columns=iris.feature_names)
X_test_df1 = pd.DataFrame(X_test, columns=iris.feature_names)
explainer = shap.KernelExplainer(clf.predict_proba, X_train_df1)
shap_values = explainer.shap_values(X_test_df1)

mlf_run.log_dataset_stats(
    X_test_df, 
    data_slice=mlf.DataSlice.TEST,
    data_schema=mlf.Schema(
        feature_column_names=iris.feature_names,
        prediction_column_name="predictions",
        actual_column_name="targets"
    ),
    model_type=mlf.ModelType.MULTICLASS_CLASSIFICATION,
    shap_values=shap_values
)

Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-03-14 17:03:14.816 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully
2022-03-14 17:03:14.822 INFO    mlfoundry.mlfoundry_run: Dataset stats have been successfully computed and logged


  0%|          | 0/30 [00:00<?, ?it/s]

2022-03-14 17:03:14.884 INFO    shap: num_full_subsets = 2
2022-03-14 17:03:14.908 INFO    shap: phi = [ 0.00249153 -0.01407185 -0.29194784 -0.02356216]
2022-03-14 17:03:14.910 INFO    shap: phi = [-0.03948572 -0.02669439  0.42593416 -0.0109125 ]
2022-03-14 17:03:14.912 INFO    shap: phi = [ 0.03699418  0.04076624 -0.13398632  0.03447467]
2022-03-14 17:03:14.936 INFO    shap: num_full_subsets = 2
2022-03-14 17:03:14.958 INFO    shap: phi = [0.02420082 0.00725375 0.5689506  0.04057953]
2022-03-14 17:03:14.960 INFO    shap: phi = [-0.03190268 -0.00364265 -0.38100665  0.0857868 ]
2022-03-14 17:03:14.961 INFO    shap: phi = [ 0.00770186 -0.0036111  -0.18794395 -0.12636633]
2022-03-14 17:03:14.986 INFO    shap: num_full_subsets = 2
2022-03-14 17:03:15.011 INFO    shap: phi = [-0.01478062 -0.00364772 -0.26554183 -0.04689305]
2022-03-14 17:03:15.013 INFO    shap: phi = [ 0.01880444  0.00846944 -0.15331498 -0.21499579]
2022-03-14 17:03:15.014 INFO    shap: phi = [-0.00402381 -0.00482172  0.418

2022-03-14 17:03:15.802 INFO    shap: num_full_subsets = 2
2022-03-14 17:03:15.823 INFO    shap: phi = [-0.010572   -0.02212778 -0.27650572 -0.02325036]
2022-03-14 17:03:15.824 INFO    shap: phi = [ 0.0188556  -0.02264182  0.45474438  0.07404428]
2022-03-14 17:03:15.825 INFO    shap: phi = [-0.00828359  0.0447696  -0.17823866 -0.05079392]
2022-03-14 17:03:15.845 INFO    shap: num_full_subsets = 2
2022-03-14 17:03:15.865 INFO    shap: phi = [-0.00229107 -0.00449167 -0.30562598 -0.01812251]
2022-03-14 17:03:15.867 INFO    shap: phi = [-0.00732368  0.00703542  0.48980803  0.07215164]
2022-03-14 17:03:15.868 INFO    shap: phi = [ 0.00961475 -0.00254376 -0.18418205 -0.05402913]
2022-03-14 17:03:15.887 INFO    shap: num_full_subsets = 2
2022-03-14 17:03:15.904 INFO    shap: phi = [-0.01240765 -0.00672481 -0.295552   -0.01596454]
2022-03-14 17:03:15.906 INFO    shap: phi = [0.01939355 0.00487543 0.45418843 0.1223521 ]
2022-03-14 17:03:15.907 INFO    shap: phi = [-0.0069859   0.00184938 -0.158

In [80]:
with open('iris_sklearn_model.pkl', 'wb') as f:
    pickle.dump(clf, f)

In [81]:
%%writefile streamlit_iris.py
import pickle 
import random
import mlfoundry as mlf
import pandas as pd

def predict_fn(i1: float, i2: float, i3: float, i4: float) -> str:
    try:
        class_name_map = dict(
            zip([0, 1, 2], ["Iris Setosa", "Iris Versicolour", "Iris Virginica"])
        )
        features = pd.DataFrame([[i1, i2, i3, i4]])
        with open('iris_sklearn_model.pkl', 'wb') as f:
            model = pickle.load(f)
        model_output = model.predict(features)[0]
        return class_name_map[model_output]
    except Exception as e:
        print(f"Exception happened: {e}. Choosing random")
        return random.choice(["Iris Setosa", "Iris Versicolour", "Iris Virginica"])

mlf_api = mlf.get_client()
mlf_run = mlf_api.create_run(project_name="iris-project-webapp")
raw_in, raw_out = mlf_run.webapp(
    fn=predict_fn, inputs=["number", "number", "number", "number"], outputs="text"
)

Overwriting streamlit_iris.py


In [82]:
mlf_run.log_webapp_file('streamlit_iris.py')

In [54]:
!mlfoundry ui 

2022-03-14 16:56:55.707 INFO    streamlit_gradio.networking: Hashes generated for all static assets.
[32m[1mMlFoundry CLI[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8502[0m
[34m  Network URL: [0m[1mhttp://192.168.1.67:8502[0m
[0m
[34m[1m  For better performance, install the Watchdog module:[0m

  $ xcode-select --install
  $ pip install watchdog
            [0m
2022-03-14 16:57:02.545 Hashes generated for all static assets.
2022-03-14 16:57:04.299 Run is created with id 8037871c80ef4785953c5fbda0f903bc and name run_2022-03-14_23:57:04_utc
2022-03-14 16:57:11.200 Run is created with id 89bb52e30042444a86635f2a1d97dbed and name run_2022-03-14_23:57:11_utc
2022-03-14 16:57:11.665 Webapp could not be generated. Error name 'random' is not defined
2022-03-14 16:57:11.665 Cannot execute streamlit /Users/cusgadmin/work/deepL/mlfoundry-examples/examples/sklearn/servicefoundry/getdirs/webapp/webapp/stre