# Assing Prediction Values and Probabilities to ValidMind Datasets

In [None]:
import validmind as vm

vm.init(
  api_host = "http://localhost:3000/api/v1/tracking",
  api_key = "...",
  api_secret = "...",
  project = "..."
)

In [None]:
import statsmodels.api as sm

%matplotlib inline

In [None]:
# Import the sample dataset from the library

from validmind.datasets.credit_risk import lending_club

df = lending_club.load_data(source="offline")

df.info()

In [None]:
preprocess_df = lending_club.preprocess(df)
preprocess_df.info()

In [None]:
fe_df = lending_club.feature_engineering(preprocess_df)
fe_df.info()

In [None]:
train_df, test_df = lending_club.split(fe_df, add_constant=True)

x_train = train_df.drop(lending_club.target_column, axis=1)
y_train = train_df[lending_club.target_column]
x_test = test_df.drop(lending_club.target_column, axis=1)
y_test = test_df[lending_club.target_column]

# Define the model
model = sm.GLM(
    y_train, 
    x_train, 
    family=sm.families.Binomial())

# Fit the model
model = model.fit()
model.summary()

In [None]:
vm_train_ds = vm.init_dataset(
    dataset=train_df,
    input_id="train_dataset",
    target_column=lending_club.target_column,
)

vm_test_ds = vm.init_dataset(
    dataset=test_df, 
    input_id="test_dataset", 
    target_column=lending_club.target_column
)

In [None]:
vm_model = vm.init_model(
    model,
    input_id="glm_model",
)

## Options to assign prediction values and probabilities to VM datasets

### 1. Assing predictions values and probabilities computed outside VM

In [None]:
#Â Compute probabilities from the model outside ValidMind
train_probabilities = model.predict(x_train)
test_probabilities = model.predict(x_test)

# Compute binary predictions from the probabilities
cut_off_threshold = 0.5
train_binary_predictions = (train_probabilities > cut_off_threshold).astype(int)
test_binary_predictions = (test_probabilities > cut_off_threshold).astype(int)

# Compute scores from the probabilities 
train_scores = lending_club.compute_scores(train_probabilities)
test_scores = lending_club.compute_scores(test_probabilities)

In [None]:
vm_train_ds.assign_predictions(
    model=vm_model,
    prediction_values=train_binary_predictions,
    prediction_probabilities = train_probabilities,
)

vm_test_ds.assign_predictions(
    model=vm_model,
    prediction_values=test_binary_predictions,
    prediction_probabilities = test_probabilities,
)

In [None]:
print(vm_test_ds)
print(vm_train_ds)

In [None]:
run_test = True
if run_test: 

    test= vm.tests.run_test(
        "validmind.model_validation.sklearn.ROCCurve",
        inputs = {
            "dataset": vm_test_ds,
            "model": vm_model,
        }
    )

In [None]:
run_test = True
if run_test: 

    test= vm.tests.run_test(
        "validmind.model_validation.statsmodels.GINITable",
        inputs = {
            "datasets": [vm_train_ds, vm_test_ds],
            "model": vm_model,
        }
    )

In [None]:
run_test = True
if run_test:

    test= vm.tests.run_test(
        "validmind.model_validation.sklearn.ClassifierPerformance",
        inputs = {
            "dataset": vm_train_ds,
            "model": vm_model,
        }
    )

### 2. Assing prediction values and probabilities from datasets with existing prediction columns

In [None]:
train_df2 = train_df.copy()
train_df2["glm_prediction_values"] = train_binary_predictions
train_df2["glm_prediction_probabilities"] = train_probabilities
train_df2.head(5)

In [None]:
test_df2 = test_df.copy()
test_df2["glm_prediction_values"] = test_binary_predictions
test_df2["glm_prediction_probabilities"] = test_probabilities
test_df2.head(5)

In [None]:
vm_train_ds = vm.init_dataset(
    dataset=train_df2,
    input_id="train_dataset",
    target_column=lending_club.target_column,
)

vm_test_ds = vm.init_dataset(
    dataset=test_df2,
    input_id="test_dataset",
    target_column=lending_club.target_column,
)


In [None]:
vm_train_ds.assign_predictions(
    model=vm_model, 
    prediction_column="glm_prediction_values",
    probability_column="glm_prediction_probabilities"
)

vm_test_ds.assign_predictions(
    model=vm_model, 
    prediction_column="glm_prediction_values",
    probability_column="glm_prediction_probabilities"
)

In [None]:
run_test = True
if run_test: 

    test= vm.tests.run_test(
        "validmind.model_validation.sklearn.ROCCurve",
        inputs = {
            "dataset": vm_test_ds,
            "model": vm_model,
        }
    )

In [None]:
run_test = True
if run_test: 

    test= vm.tests.run_test(
        "validmind.model_validation.statsmodels.GINITable",
        inputs = {
            "datasets": [vm_train_ds, vm_test_ds],
            "model": vm_model,
        }
    )

In [None]:
run_test = True
if run_test:

    test= vm.tests.run_test(
        "validmind.model_validation.sklearn.ClassifierPerformance",
        inputs = {
            "dataset": vm_train_ds,
            "model": vm_model,
        }
    )

### 3. Assign prediction values and probabilities computed automatically within VM

In [None]:
vm_train_ds = vm.init_dataset(
    dataset=train_df,
    input_id="train_dataset",
    target_column=lending_club.target_column,
)

vm_test_ds = vm.init_dataset(
    dataset=test_df,
    input_id="test_dataset",
    target_column=lending_club.target_column,
)

In [None]:
vm_train_ds.assign_predictions(model=vm_model)
vm_test_ds.assign_predictions(model=vm_model)

In [None]:
print(vm_train_ds)
print(vm_test_ds)

In [None]:
run_test = True
if run_test: 

    test= vm.tests.run_test(
        "validmind.model_validation.sklearn.ROCCurve",
        inputs = {
            "dataset": vm_test_ds,
            "model": vm_model,
        }
    )

In [None]:
run_test = True
if run_test: 

    test= vm.tests.run_test(
        "validmind.model_validation.statsmodels.GINITable",
        inputs = {
            "datasets": [vm_train_ds, vm_test_ds],
            "model": vm_model,
        }
    )

In [None]:
run_test = True
if run_test:

    test= vm.tests.run_test(
        "validmind.model_validation.sklearn.ClassifierPerformance",
        inputs = {
            "dataset": vm_train_ds,
            "model": vm_model,
        }
    )