In [3]:
import vaex
import warnings
import  mlflow 
from goldilox.datasets import load_iris
from vaex.ml.sklearn import Predictor
from sklearn.linear_model import LogisticRegression

warnings.filterwarnings("ignore")

df, features, target = load_iris()
train, test = vaex.from_pandas(df).ml.train_test_split()


model = Predictor(model=LogisticRegression(),
                  features=features,
                        target=target,
                        prediction_name='lgbm')


with mlflow.start_run() as run:
    model.fit(train)
    metrics = mlflow.sklearn.eval_and_log_metrics(model.model, test[features], test[target].values, prefix="val_")

train = model.transform(train)
print(train.head(2))



  #    sepal_length    sepal_width    petal_length    petal_width    target    lgbm
  0             4.8            3.1             1.6            0.2         0       0
  1             5.4            3.4             1.5            0.4         0       0


In [4]:
from goldilox import Pipeline

def fetch_logged_data(run_id):
    client = mlflow.tracking.MlflowClient()
    data = client.get_run(run_id).data
    tags = {k: v for k, v in data.tags.items() if not k.startswith("mlflow.")}
    artifacts = [f.path for f in client.list_artifacts(run_id, "model")]
    return {"params":data.params, "metrices":data.metrics,"tags":tags}

# nice to have all the relevant params in the pipeline
pipeline = Pipeline.from_vaex(train, variables=fetch_logged_data(run.info.run_id))



In [5]:
pipeline.save('mlflow_test', mlflow=True)

'mlflow_test'

In [13]:
# validate mlflow
loaded_model = mlflow.pyfunc.load_model('mlflow_test')
loaded_model.predict(test.to_pandas_df())

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0])

In [7]:
# will serve as mlflow server
!glx serve mlflow_test --no-conda

Running docker build as follow:
mlflow models serve -m /Users/yonatanalexander/development/xdss/goldilox/mlflow_test --no-conda
 
2022/02/07 18:18:12 INFO mlflow.models.cli: Selected backend for flavor 'python_function'
2022/02/07 18:18:12 INFO mlflow.pyfunc.backend: === Running command 'gunicorn --timeout=60 -b 127.0.0.1:5000 -w 1 ${GUNICORN_CMD_ARGS} -- mlflow.pyfunc.scoring_server.wsgi:app'
[2022-02-07 18:18:12 +0100] [80481] [INFO] Starting gunicorn 20.1.0
[2022-02-07 18:18:12 +0100] [80481] [INFO] Listening at: http://127.0.0.1:5000 (80481)
[2022-02-07 18:18:12 +0100] [80481] [INFO] Using worker: sync
[2022-02-07 18:18:12 +0100] [80485] [INFO] Booting worker with pid: 80485
^C
[2022-02-07 18:18:36 +0100] [80481] [INFO] Handling signal: int
[2022-02-07 18:18:36 +0100] [80485] [INFO] Worker exiting (pid: 80485)
