In [23]:
# loan_rl_agent.ipynb (or .py if script)

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from d3rlpy.dataset import MDPDataset
from d3rlpy.algos import DiscreteBC, DiscreteBCConfig

# ------------------------
# 1. Load & Preprocess Data
# ------------------------
CSV_PATH = r"C:\Users\Relig\Downloads\LoanApproval-ML-RL\data\accepted_2007_to_2018.csv"
SAMPLE_N = 50000

df = pd.read_csv(CSV_PATH, low_memory=False)
df = df[df['loan_status'].isin(['Fully Paid', 'Charged Off'])].copy()
df['loan_status'] = df['loan_status'].map({'Fully Paid': 0, 'Charged Off': 1})

# clean numerics
df['loan_amnt'] = pd.to_numeric(df['loan_amnt'], errors='coerce')
df['int_rate'] = df['int_rate'].astype(str).str.rstrip('%').replace('nan', np.nan)
df['int_rate'] = pd.to_numeric(df['int_rate'], errors='coerce') / 100.0
df['annual_inc'] = pd.to_numeric(df['annual_inc'], errors='coerce')
df['dti'] = pd.to_numeric(df['dti'], errors='coerce')

df = df[['loan_amnt','int_rate','annual_inc','dti',
         'emp_length','home_ownership','purpose','loan_status']].dropna()

loan_amnt_orig = df['loan_amnt'].values.copy()
int_rate_orig  = df['int_rate'].values.copy()

features = ['loan_amnt','int_rate','annual_inc','dti',
            'emp_length','home_ownership','purpose']

for col in ['emp_length','home_ownership','purpose']:
    df[col] = LabelEncoder().fit_transform(df[col].astype(str))

scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

if len(df) > SAMPLE_N:
    df = df.sample(n=SAMPLE_N, random_state=42).reset_index(drop=True)
    loan_amnt_orig = df['loan_amnt'].values
    int_rate_orig  = df['int_rate'].values

print("Final dataset shape:", df.shape)

# ------------------------
# 2. Build RL Dataset
# ------------------------
states = df[features].values.astype(np.float32)
actions = np.ones(len(df), dtype=np.int64)  # dataset logs approvals
rewards = np.where(df['loan_status'].values == 0,
                   loan_amnt_orig * int_rate_orig,
                   -loan_amnt_orig).astype(np.float32)
terminals = np.ones(len(df), dtype=bool)

rl_dataset = MDPDataset(states, actions, rewards, terminals)
print("\n✅ RL dataset created, size:", rl_dataset.size())

# ------------------------
# 3. Train RL Agent (DiscreteBC)
# ------------------------
bc_config = DiscreteBCConfig()
bc = DiscreteBC(config=bc_config, device="cpu", enable_ddp=False)

bc.build_with_dataset(rl_dataset)

bc.fit(
    dataset=rl_dataset,
    n_steps=10000
)

# ------------------------
# 4. Evaluate Policy Value
# ------------------------
pred_actions = bc.predict(states[:1000])

chosen_rewards = []
for s, a in zip(states[:1000], pred_actions):
    if a == 1:  # approve
        idx = np.random.randint(len(rewards))
        chosen_rewards.append(rewards[idx])
    else:       # deny
        chosen_rewards.append(0.0)

policy_value = np.mean(chosen_rewards)

print("\n🤖 RL Agent Results (DiscreteBC):")
print("Estimated Policy Value:", round(policy_value, 4))


Final dataset shape: (50000, 8)
2025-09-05 14:25.42 [info     ] Signatures have been automatically determined. action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]) observation_signature=Signature(dtype=[dtype('float32')], shape=[(7,)]) reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)])
2025-09-05 14:25.42 [info     ] Action-space has been automatically determined. action_space=<ActionSpace.DISCRETE: 2>
2025-09-05 14:25.43 [info     ] Action size has been automatically determined. action_size=2

✅ RL dataset created, size: 50000
2025-09-05 14:25.43 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float32')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=2)
2025-09-05 14:25.43 [info     ] Directory is created at d3rlpy_logs\DiscreteBC_202509051

Epoch 1/1: 100%|██████████| 10000/10000 [00:50<00:00, 199.92it/s, loss=0.748, imitation_loss=0.389, regularization_loss=0.359]


2025-09-05 14:26.33 [info     ] DiscreteBC_20250905142543: epoch=1 step=10000 epoch=1 metrics={'time_sample_batch': 0.001182298994064331, 'time_algorithm_update': 0.003563978672027588, 'loss': 0.7477409408986568, 'imitation_loss': 0.38867263781428335, 'regularization_loss': 0.3590683028638363, 'time_step': 0.00494197359085083} step=10000
2025-09-05 14:26.33 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteBC_20250905142543\model_10000.d3

🤖 RL Agent Results (DiscreteBC):
Estimated Policy Value: 0.1046
