# Get data

In [1]:
import pandas as pd
import json
from sklearn.datasets import load_iris

# Get teh data
iris  = load_iris()
features = iris.feature_names
df = pd.DataFrame(iris.data, columns=features)
df['target'] = iris.target

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import sklearn.pipeline
from goldilox import Pipeline

# Option 1: First create goldilox pipeline, then train (recommended)
sklearn_pipeline = sklearn.pipeline.Pipeline([('standar',StandardScaler()),('classifier',LogisticRegression())])
pipeline = Pipeline.from_sklearn(sklearn_pipeline).fit(df[features],df['target'])

# Options 2: Train model/sklearn-pipeline, then create goldilox pipeline + example of raw
sklearn_pipeline = sklearn_pipeline.fit(df[features],df['target'])
raw = Pipeline.to_raw(df[features])
pipeline = Pipeline.from_sklearn(sklearn_pipeline, raw=raw)

assert pipeline.validate()

Pipeline doesn't handle na for sepal length (cm)
Pipeline doesn't handle na for sepal width (cm)
Pipeline doesn't handle na for petal length (cm)
Pipeline doesn't handle na for petal width (cm)


The pipeline is ready to go, but it does not handle missing values which we might get in production time.   
Let's fix that!

In [5]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

imputer = ColumnTransformer([('features_mean', SimpleImputer(strategy='mean'), \
                              features)], remainder='passthrough')
sklearn_pipeline = sklearn.pipeline.Pipeline([('imputer',imputer), ('standar',StandardScaler()), ('classifier',LogisticRegression())])
pipeline = Pipeline.from_sklearn(sklearn_pipeline).fit(df[features],df['target'])
assert pipeline.validate()

In [7]:
pipeline.save('./pipeline.pkl')
print('Go to the fastapi docs here: http://127.0.0.1:5000/docs')
!gl serve 'pipeline.pkl'

Go to the fasgtapi docs here: http://127.0.0.1:5000/docs
[2021-11-26 12:10:04 +0100] [39494] [INFO] Starting gunicorn 20.1.0
[2021-11-26 12:10:04 +0100] [39494] [INFO] Listening at: http://127.0.0.1:5000 (39494)
[2021-11-26 12:10:04 +0100] [39494] [INFO] Using worker: uvicorn.workers.UvicornH11Worker
[2021-11-26 12:10:04 +0100] [39527] [INFO] Booting worker with pid: 39527
[2021-11-26 12:10:04 +0100] [39527] [INFO] Started server process [39527]
[2021-11-26 12:10:04 +0100] [39527] [INFO] Waiting for application startup.
[2021-11-26 12:10:04 +0100] [39527] [INFO] Application startup complete.
^C
[2021-11-26 12:10:21 +0100] [39494] [INFO] Handling signal: int
