In [4]:
import pandas as pd
import numpy as np
import mlfoundry as mlf
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split

2022-03-14 15:29:47.636 INFO    streamlit_gradio.networking: Hashes generated for all static assets.


# Loading Iris Dataset

In [9]:
iris = datasets.load_iris()
iris_frame = pd.DataFrame(iris.data, columns = iris.feature_names)

# Using MlFoundry APIs

## Creating run

In [10]:
mlf_api = mlf.get_client() # to save locally
mlf_run = mlf_api.create_run(project_name='sklearn-project')

2022-03-14 15:31:27.780 INFO    mlfoundry.mlfoundry_api: Run is created with id acd8130866d646ba9a4e5ea113dacfd8 and name run_2022-03-14_22:31:27_utc


## Logging the dataset

In [11]:
mlf_run.log_dataset(iris_frame, data_slice=mlf.DataSlice.TRAIN)  # saves in parquet format
mlf_run.log_dataset(iris_frame, data_slice=mlf.DataSlice.TEST, fileformat=mlf.FileFormat.CSV) # saves in csv format

## Training the model

In [12]:
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model Training
clf = svm.SVC(gamma='scale', kernel='rbf', probability=True)
clf.fit(X, y)

SVC(probability=True)

## Logging Parameters

In [13]:
params = {'classes': clf.classes_, 'features': clf.n_features_in_}
mlf_run.log_params(params)

2022-03-14 15:31:29.714 INFO    mlfoundry.mlfoundry_run: Parameters logged successfully


## Logging the model

In [14]:
mlf_run.log_model(clf, mlf.ModelFramework.SKLEARN)

2022-03-14 15:31:34.490 INFO    mlfoundry.mlfoundry_run: Model logged Successfully


## Logging Predictions Synchronously

In [16]:
y_hat_train = clf.predict(X_train)
y_hat_test = clf.predict(X_test)

## Logging the metrics

In [18]:
from sklearn.metrics import accuracy_score, f1_score
metrics_dict = {}

metrics_dict['accuracy_score'] = accuracy_score(y_test, y_hat_test)
metrics_dict['f1_score'] = f1_score(y_test, y_hat_test, average='micro')

mlf_run.log_metrics(metrics_dict)


2022-03-14 15:32:06.005 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully


## Logging the Dataset Stats

In [19]:
import shap

y_train_prob = clf.predict_proba(X_train)


X_train_df = pd.DataFrame(X_train, columns=iris.feature_names)
X_train_df['targets'] = y_train
X_train_df['predictions'] = y_hat_train
X_train_df['prediction_probabilities'] = list(y_train_prob)

X_test_df = pd.DataFrame(X_test, columns=iris.feature_names)
X_test_df['targets'] = y_test
X_test_df['predictions'] = y_hat_test

# compute and log stats for train data without shap
mlf_run.log_dataset_stats(
    X_train_df, 
    data_slice=mlf.DataSlice.TRAIN,
    data_schema=mlf.Schema(
        feature_column_names=iris.feature_names,
        prediction_column_name="predictions",
        actual_column_name="targets",
        prediction_probability_column_name="prediction_probabilities"   # to calculate probability related metrics
    ),
    model_type=mlf.ModelType.MULTICLASS_CLASSIFICATION,
)

# shap value computation
X_train_df1 = pd.DataFrame(X_train, columns=iris.feature_names)
X_test_df1 = pd.DataFrame(X_test, columns=iris.feature_names)
explainer = shap.KernelExplainer(clf.predict_proba, X_train_df1)
shap_values = explainer.shap_values(X_test_df1)

mlf_run.log_dataset_stats(
    X_test_df, 
    data_slice=mlf.DataSlice.TEST,
    data_schema=mlf.Schema(
        feature_column_names=iris.feature_names,
        prediction_column_name="predictions",
        actual_column_name="targets"
    ),
    model_type=mlf.ModelType.MULTICLASS_CLASSIFICATION,
    shap_values=shap_values
)

Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-03-14 15:32:12.653 INFO    whylogs.app.config: No config file loaded
Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
2022-03-14 15:32:12.707 INFO    mlfoundry.mlfoundry_run: Metrics logged successfully
2022-03-14 15:32:12.710 INFO    mlfoundry.mlfoundry_run: Dataset stats have been successfully computed and logged


WARN: Missing config


  0%|          | 0/30 [00:00<?, ?it/s]

2022-03-14 15:32:12.787 INFO    shap: num_full_subsets = 2
2022-03-14 15:32:12.811 INFO    shap: phi = [-0.01186082 -0.00678627 -0.28938475 -0.01532609]
2022-03-14 15:32:12.812 INFO    shap: phi = [0.01860775 0.00662815 0.45994694 0.11093328]
2022-03-14 15:32:12.814 INFO    shap: phi = [-6.74692326e-03  1.58113289e-04 -1.70562190e-01 -9.56071879e-02]
2022-03-14 15:32:12.837 INFO    shap: num_full_subsets = 2
2022-03-14 15:32:12.861 INFO    shap: phi = [0.01664889 0.01463432 0.56927224 0.0450056 ]
2022-03-14 15:32:12.863 INFO    shap: phi = [-0.01811083 -0.00699037 -0.39424057  0.07716619]
2022-03-14 15:32:12.865 INFO    shap: phi = [ 0.00146194 -0.00764395 -0.17503167 -0.1221718 ]
2022-03-14 15:32:12.889 INFO    shap: num_full_subsets = 2
2022-03-14 15:32:12.915 INFO    shap: phi = [-0.0100717   0.00157264 -0.27178825 -0.04247354]
2022-03-14 15:32:12.916 INFO    shap: phi = [ 0.01220038  0.01409669 -0.15052694 -0.23148152]
2022-03-14 15:32:12.918 INFO    shap: phi = [-0.00212868 -0.015

2022-03-14 15:32:14.004 INFO    shap: phi = [0.02435905 0.0190511  0.53925178 0.04957901]
2022-03-14 15:32:14.005 INFO    shap: phi = [-0.02680524 -0.01095982 -0.35668498  0.06344874]
2022-03-14 15:32:14.007 INFO    shap: phi = [ 0.0024462  -0.00809128 -0.1825668  -0.11302774]
2022-03-14 15:32:14.030 INFO    shap: num_full_subsets = 2
2022-03-14 15:32:14.053 INFO    shap: phi = [0.02113613 0.0162493  0.55619032 0.04968873]
2022-03-14 15:32:14.054 INFO    shap: phi = [-0.02355404 -0.00857182 -0.38090714  0.07277155]
2022-03-14 15:32:14.056 INFO    shap: phi = [ 0.00241791 -0.00767748 -0.17528318 -0.12246028]
2022-03-14 15:32:14.078 INFO    shap: num_full_subsets = 2
2022-03-14 15:32:14.109 INFO    shap: phi = [ 0.00672703 -0.05824542 -0.1581745  -0.02500913]
2022-03-14 15:32:14.110 INFO    shap: phi = [-0.01425005  0.05654738  0.40318485  0.08331505]
2022-03-14 15:32:14.112 INFO    shap: phi = [ 0.00752303  0.00169805 -0.24501035 -0.05830592]
2022-03-14 15:32:14.134 INFO    shap: num_fu

In [None]:
!mlfoundry ui 

2022-03-14 15:32:39.220 INFO    streamlit_gradio.networking: Hashes generated for all static assets.
[32m[1mMlFoundry CLI[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8502[0m
[34m  Network URL: [0m[1mhttp://192.168.1.67:8502[0m
[0m
[34m[1m  For better performance, install the Watchdog module:[0m

  $ xcode-select --install
  $ pip install watchdog
            [0m
2022-03-14 15:32:50.949 Hashes generated for all static assets.
2022-03-14 15:32:52.728 Error in loading fpr, tpr. Error msg 'roc_curve'
2022-03-14 15:32:52.733 Error in loading precision, recall. Error msg 'precision_recall_curve'
2022-03-14 15:32:58.127 No config file loaded
WARN: Missing config

this method is deprecated in favour of `Styler.format(precision=..)`

2022-03-14 16:22:50.704 Error in loading fpr, tpr. Error msg 'roc_curve'
2022-03-14 16:22:50.716 Error in loading precision, recall. Error msg 'precision_recall_curve'

thi