In [28]:
from dataset.compas import CompasDataset
from dataset import dataset_loader
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

In [29]:
%reload_ext autoreload
%autoreload 2

In [30]:
name = 'compas'
dataset_ares = dataset_loader(name, data_path='rawdata/', dropped_features=[], n_bins=None)

dataset = CompasDataset(dataset_ares=dataset_ares)
df = dataset.get_dataframe()

### 1. standardization

In [31]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [32]:
scaler = StandardScaler()

# Assume df is your DataFrame and 'target' is your target column
X = df.drop(columns=['Status'])
Y = df['Status']

# Step 1: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

X_train_scaler = scaler.fit_transform(X_train)
X_test_scaler = scaler.transform(X_test)

print(f'X_train_scaler: {X_train_scaler.shape}')
print(f'X_test_scaler: {X_test_scaler.shape}')

X_train_scaler: (4937, 15)
X_test_scaler: (1235, 15)


In [33]:
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [34]:
from models import PyTorchDNN, PyTorchLinearSVM, PyTorchRBFNet,PyTorchLogisticRegression,PyTorchLinearSVM

In [37]:
# 获取输入维度
input_dim = X_train_scaler.shape[1]

# 初始化 PyTorchDNN 模型
model = PyTorchDNN(input_dim=input_dim, hidden_dim=12)

# 定义损失函数和优化器
criterion = nn.BCEWithLogitsLoss()  # 二分类任务中的损失函数
optimizer = optim.Adam(model.parameters(), lr=0.01)  # 使用Adam优化器

# 数据转换为 PyTorch tensor 格式
X_train_tensor = torch.FloatTensor(X_train_scaler)
y_train_tensor = torch.FloatTensor(y_train).view(-1, 1)  # 确保y是二维



# 训练模型
epochs = 400  # 训练次数
for epoch in range(epochs):
    model.train()  # 进入训练模式
    # 前向传播
    outputs = model(X_train_tensor)
    # 计算损失
    loss = criterion(outputs, y_train_tensor)
    # 梯度清零
    optimizer.zero_grad()
    # 反向传播
    loss.backward()
    # 优化更新参数
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

# 保存训练好的模型
torch.save(model.state_dict(), 'dnn_model.pth')

model_pytdnn = model

Epoch [10/400], Loss: 0.6908
Epoch [20/400], Loss: 0.6609
Epoch [30/400], Loss: 0.6484
Epoch [40/400], Loss: 0.6406
Epoch [50/400], Loss: 0.6372
Epoch [60/400], Loss: 0.6359
Epoch [70/400], Loss: 0.6352
Epoch [80/400], Loss: 0.6345
Epoch [90/400], Loss: 0.6341
Epoch [100/400], Loss: 0.6338
Epoch [110/400], Loss: 0.6335
Epoch [120/400], Loss: 0.6334
Epoch [130/400], Loss: 0.6332
Epoch [140/400], Loss: 0.6331
Epoch [150/400], Loss: 0.6330
Epoch [160/400], Loss: 0.6328
Epoch [170/400], Loss: 0.6327
Epoch [180/400], Loss: 0.6326
Epoch [190/400], Loss: 0.6325
Epoch [200/400], Loss: 0.6324
Epoch [210/400], Loss: 0.6323
Epoch [220/400], Loss: 0.6322
Epoch [230/400], Loss: 0.6321
Epoch [240/400], Loss: 0.6320
Epoch [250/400], Loss: 0.6319
Epoch [260/400], Loss: 0.6318
Epoch [270/400], Loss: 0.6317
Epoch [280/400], Loss: 0.6317
Epoch [290/400], Loss: 0.6316
Epoch [300/400], Loss: 0.6316
Epoch [310/400], Loss: 0.6315
Epoch [320/400], Loss: 0.6315
Epoch [330/400], Loss: 0.6314
Epoch [340/400], Lo

In [38]:
# 将测试数据转换为 PyTorch tensor
X_test_tensor = torch.FloatTensor(X_test_scaler)
y_test_tensor = torch.FloatTensor(y_test).view(-1, 1)  # 确保y是二维


In [39]:
# 进入评估模式
model.eval()

# 禁用梯度计算，进行推理
with torch.no_grad():
    outputs = model(X_test_tensor)  # 前向传播
    test_loss = criterion(outputs, y_test_tensor)  # 计算测试集上的损失

# 打印测试集的损失
print(f'Test Loss: {test_loss.item():.4f}')

Test Loss: 0.6494


### 2. model, data

In [40]:
from xai_cola import data,ml_model,counterfactual_explainer,cola,counterfactual_refiner
from xai_cola.cola_explainer import COunterfactualwithLimitedActions

In [41]:
# initialize the dataset
compas_dice = data.NumpyData(data=X_test_scaler, target_name="Status", feature_names=None)

# initialize the model
ml_model = ml_model.Model(model=model_pytdnn, backend="PYT")

In [42]:
compas_dice.get_x()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,-0.482399,0.482399,-0.521627,0.859291,-0.516911,0.963204,-0.075524,-0.708181,-0.300544,-0.040287,-0.24613,-1.352489,1.352489,2.055454,-0.161851
1,-0.482399,0.482399,-0.521627,0.859291,-0.516911,-1.038201,-0.075524,-0.708181,3.327297,-0.040287,-0.24613,-1.352489,1.352489,-0.481384,-0.292683
2,-0.482399,0.482399,-0.521627,0.859291,-0.516911,0.963204,-0.075524,-0.708181,-0.300544,-0.040287,-0.24613,0.739378,-0.739378,-0.269981,-0.314488
3,-0.482399,0.482399,1.917078,-1.163751,-0.516911,-1.038201,-0.075524,-0.708181,3.327297,-0.040287,-0.24613,-1.352489,1.352489,-0.692787,-0.314488
4,2.072971,-2.072971,1.917078,-1.163751,-0.516911,-1.038201,-0.075524,1.412068,-0.300544,-0.040287,-0.24613,0.739378,-0.739378,-0.481384,-0.292683
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1230,-0.482399,0.482399,-0.521627,-1.163751,1.934568,-1.038201,-0.075524,1.412068,-0.300544,-0.040287,-0.24613,-1.352489,1.352489,-0.692787,0.601337
1231,-0.482399,0.482399,-0.521627,0.859291,-0.516911,0.963204,-0.075524,-0.708181,-0.300544,-0.040287,-0.24613,0.739378,-0.739378,1.209841,-0.292683
1232,-0.482399,0.482399,1.917078,-1.163751,-0.516911,-1.038201,-0.075524,1.412068,-0.300544,-0.040287,-0.24613,0.739378,-0.739378,0.575631,-0.314488
1233,-0.482399,0.482399,-0.521627,0.859291,-0.516911,0.963204,-0.075524,-0.708181,-0.300544,-0.040287,-0.24613,0.739378,-0.739378,-0.692787,-0.292683


In [43]:
b = model(X_test_tensor)
b

tensor([[5.9543e-15],
        [9.9994e-01],
        [2.2517e-02],
        ...,
        [5.9606e-07],
        [9.9675e-01],
        [1.5533e-13]], grad_fn=<SigmoidBackward0>)

In [44]:
a = ml_model.predict(X_test_tensor)
a

array([0., 1., 0., ..., 0., 1., 0.], dtype=float32)

In [45]:
# when ml_model is pytorch model, we should use data in tensor type

counterfactual_explainer1 = counterfactual_explainer.DiCE(
    ml_model=ml_model, 
    data=compas_dice, 
    sample_num=3
    )  #will add more input params according to the dice_ml algorithm 

x_factual_ext:             0         1         2         3         4         5         6   \
0    -0.482399  0.482399 -0.521627  0.859291 -0.516911  0.963204 -0.075524   
1    -0.482399  0.482399 -0.521627  0.859291 -0.516911 -1.038201 -0.075524   
2    -0.482399  0.482399 -0.521627  0.859291 -0.516911  0.963204 -0.075524   
3    -0.482399  0.482399  1.917078 -1.163751 -0.516911 -1.038201 -0.075524   
4     2.072971 -2.072971  1.917078 -1.163751 -0.516911 -1.038201 -0.075524   
...        ...       ...       ...       ...       ...       ...       ...   
1230 -0.482399  0.482399 -0.521627 -1.163751  1.934568 -1.038201 -0.075524   
1231 -0.482399  0.482399 -0.521627  0.859291 -0.516911  0.963204 -0.075524   
1232 -0.482399  0.482399  1.917078 -1.163751 -0.516911 -1.038201 -0.075524   
1233 -0.482399  0.482399 -0.521627  0.859291 -0.516911  0.963204 -0.075524   
1234 -0.482399  0.482399  1.917078 -1.163751 -0.516911  0.963204 -0.075524   

            7         8         9        10     

100%|██████████| 3/3 [00:25<00:00,  8.40s/it]


In [52]:
factual = counterfactual_explainer1.factual
counterfactual = counterfactual_explainer1.counterfactual
factual_y = ml_model.predict(torch.FloatTensor(factual))
counterfactual_y = ml_model.predict(torch.FloatTensor(counterfactual))
print(f'factual: {factual}')
print(f'counterfactual: {counterfactual}')
print(f'factual_y: {factual_y}')
print(f'counterfactual_y: {counterfactual_y}')
# 使用scaler.inverse_transform将数据转换为原始数据
factual_inverse = scaler.inverse_transform(factual)
counterfactual_inverse = scaler.inverse_transform(counterfactual)
print(f'factual_inverse: {factual_inverse}')
print(f'counterfactual_inverse: {counterfactual_inverse}')


factual: [[ 2.07297137 -2.07297137 -0.52162709  0.85929057 -0.51691114 -1.03820131
  -0.07552357  1.41206789 -0.30054426 -0.04028706 -0.24613009 -1.35248895
   1.35248895 -0.4813843  -0.29268282]
 [ 2.07297137 -2.07297137 -0.52162709 -1.1637507   1.93456849 -1.03820131
  -0.07552357 -0.70818125  3.3272969  -0.04028706 -0.24613009 -1.35248895
   1.35248895 -0.69278746 -0.29268282]
 [-0.48239933  0.48239933 -0.52162709  0.85929057 -0.51691114  0.96320434
  -0.07552357 -0.70818125 -0.30054426 -0.04028706 -0.24613009  0.73937757
  -0.73937757 -0.69278746 -0.29268282]]
counterfactual: [[ 2.07297137 -2.07297137 -0.52162709  0.85929057 -0.51691114 -1.03820131
  -0.07552357  1.41206789 -0.30054426 -0.04028706 -0.24613009  0.10344984
   1.35248895 -0.4813843  -0.29268282]
 [ 2.07297137 -2.07297137 -0.52162709 -1.1637507   1.93456849 -1.03820131
  -0.07552357 -0.70818125  3.3272969  22.67216684 -0.24613009 -1.35248895
   1.35248895  6.67847319 -0.29268282]
 [ 0.28198518  0.48239933  1.66685679  

In [55]:
print(f'factual_inverse.shape: {factual_inverse.shape}')
print(f'counterfactual_inverse.shape: {counterfactual_inverse.shape}')

factual_inverse.shape: (3, 15)
counterfactual_inverse.shape: (3, 15)


In [53]:
pd.DataFrame(factual_inverse, columns=X.columns)

Unnamed: 0,Sex = Female,Sex = Male,Age_Cat = Less than 25,Age_Cat = 25 - 45,Age_Cat = Greater than 45,Race = African-American,Race = Asian,Race = Caucasian,Race = Hispanic,Race = Native American,Race = Other,C_Charge_Degree = F,C_Charge_Degree = M,Priors_Count,Time_Served
0,1.0,0.0,-2.775558e-17,1.0,0.0,0.0,0.0,1.0,0.0,2.168404e-19,0.0,0.0,1.0,1.0,1.0
1,1.0,0.0,-2.775558e-17,0.0,1.0,0.0,0.0,0.0,1.0,2.168404e-19,0.0,0.0,1.0,0.0,1.0
2,0.0,1.0,-2.775558e-17,1.0,0.0,1.0,0.0,0.0,0.0,2.168404e-19,0.0,1.0,0.0,0.0,1.0


In [54]:
pd.DataFrame(counterfactual_inverse, columns=X.columns)

Unnamed: 0,Sex = Female,Sex = Male,Age_Cat = Less than 25,Age_Cat = 25 - 45,Age_Cat = Greater than 45,Race = African-American,Race = Asian,Race = Caucasian,Race = Hispanic,Race = Native American,Race = Other,C_Charge_Degree = F,C_Charge_Degree = M,Priors_Count,Time_Served
0,1.0,0.0,-2.775558e-17,1.0,0.0,0.0,0.0,1.0,0.0,2.168404e-19,0.0,0.696,1.0,1.0,1.0
1,1.0,0.0,-2.775558e-17,0.0,1.0,0.0,0.0,0.0,1.0,0.9135353,0.0,0.0,1.0,34.868262,1.0
2,0.299129,1.0,0.8973957,1.0,0.0,1.0,0.0,0.0,0.0,2.168404e-19,0.0,1.0,0.0,0.0,1.0
