# A Simple Model

In [2]:
import pandas as pd
from joblib import dump, load
from hamilton import driver, base

In [3]:
# parameters
model_file: str = "models/rf.joblib"
infer_type: str = "test"
encoder_file: str = "intermediate_data/encoder.joblib"
data: str = "data/test.csv"

In [4]:
index_col = 'passengerid'
target_col = "survived"
config_model = {
    'index_column': index_col,
    'target_column': target_col,
    'random_state': 42,
    'max_depth': None,
    'validation_size_fraction': 0.33,
    't': 0.5
}

In [5]:
import features_pipeline, model_pipeline

In [6]:
fit_clf = load(model_file)

In [10]:
df = pd.read_csv(data)

In [8]:
out = load(encoder_file)

In [9]:
inputencoders = {}
for k in out['encoders']:
    knew = "".join(k.split('_'))
    inputencoders[knew] = out['encoders'][k]

## Inference 2: Inference from the very beginning of the data

In [11]:
config_infer2 = {
    'index_column': index_col,
    'target_column': target_col,
    't': 0.5
}


In [13]:
infer2_adapter = base.SimplePythonGraphAdapter(base.DictResult())
infer2_dr = driver.Driver(config_infer2, 
                            features_pipeline, model_pipeline,
                            adapter = infer2_adapter)
dinfer2 = dict(
    df = df,
    clf = fit_clf,
    **inputencoders
)

infer2_dr.visualize_execution(['chain_predictions'],
                                       f"./artifacts/infer2_{infer_type}.dot", 
                                       {}, 
                                       inputs = dinfer2
)

In [14]:
from IPython.display import Image, Markdown

In [15]:
Markdown(f"![](./infer2_{infer_type}.dot.pdf)")

![](./infer2_test.dot.pdf)

In [16]:
infer2dict = infer2_dr.execute(['chain_predictions'],inputs = dinfer2)

<class 'pandas.core.frame.DataFrame'>


In [17]:
infer2dict['chain_predictions']

(array([0.  , 0.53, 0.79, ..., 0.07, 0.92, 0.68]),
 array([0, 1, 1, ..., 0, 1, 1]))

In [19]:
dump(infer2dict, f"results/infer2_{infer_type}.joblib")

['results/infer2_test.joblib']