# Writing models in Python

In [1]:
import os
import sys
from pathlib import Path

home = str(Path.home())
d = os.path.join(home, "Sync/code/rh/trusty/python-trustyai")
sys.path.insert(0, d)

classpath = [
    "../dep/org/kie/kogito/explainability-core/1.5.0.Final/*",
    "../dep/org/slf4j/slf4j-api/1.7.30/slf4j-api-1.7.30.jar",
    "../dep/org/apache/commons/commons-lang3/3.8.1/commons-lang3-3.8.1.jar",
]

In [2]:
import trustyai

trustyai.init(classpath)

In [3]:
import pandas as pd

input_file = os.path.join(d, "docs/data/train.csv")
df = pd.read_csv(input_file)

# drop NAs
df = df.dropna()

# remove non-numeric rows from 'Age'
df = df[pd.to_numeric(df["Age"], errors="coerce").notnull()]

# filter out outlier incomes
filtered = df[df["Income"] < 370]

inputs = filtered[["Age", "Debt", "YearsEmployed", "Income"]]

# convert 'Age' to a float field
inputs["Age"] = inputs["Age"].astype("float64")

outputs = filtered[["Approved"]]
outputs = outputs.replace({"-": 0, "+": 1}).astype("int8")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  inputs["Age"] = inputs["Age"].astype("float64")


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

X_train, X_test, y_train, y_test = train_test_split(
    inputs, outputs, test_size=0.4, random_state=23
)

model = RandomForestClassifier(
    max_depth=8,
    max_leaf_nodes=64,
    max_samples=0.5,
    n_estimators=10,
    verbose=True,
    n_jobs=1,
)

model = model.fit(X_train, y_train)

  model = model.fit(X_train, y_train)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished


In [5]:
import numpy as np

model.predict(np.array([[34.75, 15.0, 5.375, 134]]))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished


array([1], dtype=int8)

In [6]:
from trustyai.model import (
    PredictionInput,
    Output,
    PredictionOutput,
    Type,
    Value,
)
from java.util import Random, ArrayList, List
from trustyai.utils import toJList


def creditApproval(inputs):
    predictionOutputs = ArrayList()
    features = inputs[0].getFeatures()  # single element
    values = [features.get(i).getValue().asNumber() for i in range(features.size())]
    prediction = model.predict([values])
    output = [Output(f"approved", Type.NUMBER, Value(prediction[0]), 1.0)]
    predictionOutput = PredictionOutput(toJList(output))
    predictionOutputs.add(predictionOutput)
    return predictionOutputs

In [7]:
from trustyai.model import FeatureFactory

def createInput(age, debt, year, income):
    features = ArrayList()
    features.add(FeatureFactory.newNumericalFeature("Age", age))
    features.add(FeatureFactory.newNumericalFeature("Debt", debt))
    features.add(FeatureFactory.newNumericalFeature("YearsEmployed", year))
    features.add(FeatureFactory.newNumericalFeature("Income", income))
    return PredictionInput(features)

inputs = createInput(34.75, 15.0, 5.375, 134.0)
x = creditApproval([inputs])
print(x.get(0).getOutputs().get(0).toString())

Output{value=1, type=number, score=1.0, name='approved'}


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished


In [14]:
from trustyai.local.lime import LimeConfig, LimeExplainer
from java.util import Random
from trustyai.model import PerturbationContext

jrandom = Random()

DEFAULT_NO_OF_PERTURBATIONS = 10

limeConfig = (
    LimeConfig()
    .withPerturbationContext(PerturbationContext(jrandom, DEFAULT_NO_OF_PERTURBATIONS))
    .withSamples(10)
)
limeExplainer = LimeExplainer(limeConfig)

In [15]:
from trustyai.model import PredictionProvider

provider = PredictionProvider(creditApproval)

In [16]:
from trustyai.model import Prediction

outputs = provider.predictAsync([inputs]).get().get(0)
prediction = Prediction(inputs, outputs)

saliencyMap = limeExplainer.explainAsync(prediction, provider).get()

print(saliencyMap)

{approved=Saliency{output=Output{value=1, type=number, score=1.0, name='approved'}, perFeatureImportance=[FeatureImportance{feature=Feature{name='Age', type=number, value=34.75}, score=0.07716557457196652}, FeatureImportance{feature=Feature{name='Debt', type=number, value=15.0}, score=0.07716557457196652}, FeatureImportance{feature=Feature{name='YearsEmployed', type=number, value=5.375}, score=0.07716557457196652}, FeatureImportance{feature=Feature{name='Income', type=number, value=134.0}, score=0.07716557457196652}]}}


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished
