In [1]:
from os import makedirs
from os import path
import json

In [2]:
tmp_data_folder = "./sample-data/output/"

if not path.exists(tmp_data_folder):
    makedirs(tmp_data_folder)

with open("./ml_config.json", "r") as read_file:
    config = json.load(read_file)

# Pipeline inputs
data_input_uri = config["pipeline_inputs"]["input_data"]
feature_list = config["pipeline_inputs"]["feature_list"]
label_col = config["pipeline_inputs"]["label_col"]
learning_rate = config["pipeline_inputs"]["learning_rate"]
n_estimators = config["pipeline_inputs"]["n_estimators"]
test_size = config["pipeline_inputs"]["test_size"]
random_state = config["pipeline_inputs"]["random_state"]

In [3]:
# Load component
%load_ext autoreload
%autoreload 2

from components.prep import data_prep_func
from components.split_data import split_data_func
from components.train import train_func
from components.score import score_func
from components.eval import eval_func




In [4]:
data_prep_func(
    data_source = "./sample-data/nyc-taxi-data.csv",
    data_cooked = tmp_data_folder,
)

split_data_func(
    data_cooked = tmp_data_folder,
    test_size = test_size,
    random_state = random_state,
    data_train = tmp_data_folder,
    data_test= tmp_data_folder
)

train_func(
    train_data=tmp_data_folder,
    feature_list=feature_list,
    label_col=label_col,
    learning_rate=learning_rate,
    n_estimators=n_estimators,
    model_output=tmp_data_folder
)

score_func(
    test_data=tmp_data_folder,
    model_input=tmp_data_folder,
    feature_list=feature_list,
    scored_data=tmp_data_folder
)

eval_func(
    scored_data=tmp_data_folder,
    label_col=label_col,
    eval_result=tmp_data_folder
)