# Reviewing a simple sklearn model

In [None]:
import os

os.environ["MLFLOW_TRACKING_URI"] = "http://localhost:5000"
print(os.environ.get("MLFLOW_TRACKING_URI"))

In [None]:
# flatten the images
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
import mlflow
from sklearn.datasets import load_digits

In [None]:
digits = load_digits()

In [None]:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

# Create a classifier: a support vector classifier
clf = svm.SVC(gamma=0.003)

# Split data into 50% train and 50% test subsets
X_train, X_test, y_train, y_test = train_test_split(
    data, digits.target, test_size=0.5, shuffle=False
)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# enable autologging
mlflow.sklearn.autolog()

# Learn the digits on the train subset
with mlflow.start_run():
    clf.fit(X_train, y_train)


## Artifact file
```
artifact_path: model
flavors:
  python_function:
    env: conda.yaml
    loader_module: mlflow.sklearn
    model_path: model.pkl
    python_version: 3.7.6
  sklearn:
    pickled_model: model.pkl
    serialization_format: cloudpickle
    sklearn_version: 0.22.2.post1
run_id: 57d4216eeea1499c8607b1d3f6265775
signature:
  inputs: '[{"type": "double"}, {"type": "double"}, {"type": "double"}, {"type": "double"},
    {"type": "double"}, {"type": "double"}, {"type": "double"}, {"type": "double"},
    {"type": "double"}, {"type": "double"}, {"type": "double"}, {"type": "double"},
    {"type": "double"}, {"type": "double"}, {"type": "double"}, {"type": "double"},
    {"type": "double"}, {"type": "double"}, {"type": "double"}, {"type": "double"},
    {"type": "double"}, {"type": "double"}, {"type": "double"}, {"type": "double"},
    {"type": "double"}, {"type": "double"}, {"type": "double"}, {"type": "double"}]'
  outputs: '[{"type": "long"}]'
utc_time_created: '2021-03-11 19:28:54.202276'
```

## Capturing input schema

In [None]:
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
from sklearn.datasets import load_iris
from sklearn import tree

mlflow.set_experiment("Infer Siganture on DT")
with mlflow.start_run(run_name='dt_model_baseline') as run:
    iris = load_iris()
    sk_model = tree.DecisionTreeClassifier()
    sk_model = sk_model.fit(iris.data, iris.target)

    # log model params
    mlflow.log_param("criterion", sk_model.criterion)
    mlflow.log_param("splitter", sk_model.splitter)
    signature = infer_signature(iris.data, sk_model.predict(iris.data))

    # log model
    mlflow.sklearn.log_model(sk_model, "sk_models", signature=signature)
