In [1]:
!pip install pandas numpy scikit-learn mlflow==0.9.1

Collecting mlflow==0.9.1
[?25l  Downloading https://files.pythonhosted.org/packages/4f/8a/3713ce558aba91acf8495bf9e82961030858eb5fe3041c5c51186e89fc85/mlflow-0.9.1-py3-none-any.whl (11.8MB)
[K    100% |████████████████████████████████| 11.8MB 2.7MB/s eta 0:00:01
Collecting databricks-cli>=0.8.0 (from mlflow==0.9.1)
[?25l  Downloading https://files.pythonhosted.org/packages/5f/38/f83bc71c5e7351a03e8d44aaf04647d076bbf8f097e3f93b921704b7a74c/databricks_cli-0.8.7-py3-none-any.whl (82kB)
[K    100% |████████████████████████████████| 92kB 26.6MB/s ta 0:00:01
Collecting boto3>=1.7.12 (from mlflow==0.9.1)
[?25l  Downloading https://files.pythonhosted.org/packages/a6/1f/b272ead5ccc5370717f3c65ebd5092feab90e748db041bd96c565e7d1a72/boto3-1.9.169-py2.py3-none-any.whl (128kB)
[K    100% |████████████████████████████████| 133kB 34.1MB/s ta 0:00:01
Collecting click>=7.0 (from mlflow==0.9.1)
[?25l  Downloading https://files.pythonhosted.org/packages/fa/37/45185cb5abbc30d7257104c434fe0b07e5a195a

Collecting smmap2>=2.0.0 (from gitdb2>=2.0.0->gitpython>=2.1.0->mlflow==0.9.1)
  Downloading https://files.pythonhosted.org/packages/55/d2/866d45e3a121ee15a1dc013824d58072fd5c7799c9c34d01378eb262ca8f/smmap2-2.0.5-py2.py3-none-any.whl
Collecting nose (from nose-exclude>=0.5.0->mleap>=0.8.1->mlflow==0.9.1)
[?25l  Downloading https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl (154kB)
[K    100% |████████████████████████████████| 163kB 35.2MB/s ta 0:00:01
[?25hBuilding wheels for collected packages: querystring-parser, simplejson, mleap, tabulate, nose-exclude
  Building wheel for querystring-parser (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/ee/09/99/bf937e4f02788fa8b33dc5240842ba3977ba5c3c4ad4a115d7
  Building wheel for simplejson (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/5d/1a/1e/0350bb3df3e74215cd91325344cc86c2c691f

In [4]:
# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.

import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

import mlflow
import mlflow.sklearn


def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

    
if __name__ == "__main__":
    warnings.filterwarnings("ignore")
    np.random.seed(40)
    
    users_home = '/mnt/pipelineai/users'
    experiment_base_path = '%s/experiments' % users_home
    tracking_uri='file://%s' % experiment_base_path
    mlflow.set_tracking_uri(tracking_uri)

    experiment_name = 'wine'

    mlflow.set_experiment(experiment_name)    

    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    wine_path = './wine-quality.csv'
    data = pd.read_csv(wine_path)

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    alpha = 0.5
    l1_ratio = 0.5

    with mlflow.start_run():
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(lr, "model")

Elasticnet model (alpha=0.500000, l1_ratio=0.500000):
  RMSE: 0.82224284975954
  MAE: 0.6278761410160693
  R2: 0.12678721972772689


# Check the MLflow Pipelines Tab
![MLflow Pipeline](https://raw.githubusercontent.com/PipelineAI/site/master/assets/img/mlflow-pipeline.png)