In [114]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
import recourse as rs

# import data
url = 'https://raw.githubusercontent.com/ustunb/actionable-recourse/master/examples/paper/data/credit_processed.csv'
df = pd.read_csv(url)
y, X = df.iloc[:, 0], df.iloc[:, 1:]

# train a classifier
clf = LogisticRegression(max_iter = 1000)
clf.fit(X, y)
yhat = clf.predict(X)

# customize the set of actions
A = rs.ActionSet(X)  ## matrix of features. ActionSet will set bounds and step sizes by default

# specify immutable variables
A['Married'].mutable = False

# can only specify properties for multiple variables using a list
A['Age_lt_25'].mutable = False
A['Age_in_25_to_40'].mutable = False
A['Age_in_40_to_59'].mutable = False
A['Age_geq_60'].mutable = False


# education level
A['EducationLevel'].step_direction = 1  ## force conditional immutability.
A['EducationLevel'].step_size = 1  ## set step-size to a custom value.
A['EducationLevel'].step_type = "absolute"  ## force conditional immutability.
A['EducationLevel'].bounds = (0, 3)

A['TotalMonthsOverdue'].step_size = 1  ## set step-size to a custom value.
A['TotalMonthsOverdue'].step_type = "absolute"  ## discretize on absolute values of feature rather than percentile values
A['TotalMonthsOverdue'].bounds = (0, 100)  ## set bounds to a custom value.

## get model coefficients and align
A.set_alignment(clf)  ## tells `ActionSet` which directions  each feature should move in to produce positive change.

# Get one individual
i = np.flatnonzero(yhat <= 0).astype(int)[0]

# build a flipset for one individual
fs = rs.Flipset(x = X.values[i], action_set = A, clf = clf)
fs.populate(enumeration_type = 'distinct_subsets', total_items = 10)
fs.to_latex()
fs.to_html()

# Run Recourse Audit on Training Data
auditor = rs.RecourseAuditor(A, coefficients = clf.coef_, intercept = clf.intercept_)
audit_df = auditor.audit(X)  ## matrix of features over which we will perform the audit.

## print mean feasibility and cost of recourse
print(audit_df['feasible'].mean())
print(audit_df['cost'].mean())
# print_recourse_audit_report(X, audit_df, y)
# or produce additional information of cost of recourse by other variables
# print_recourse_audit_report(X, audit_df, y, group_by = ['y', 'Married', 'EducationLevel'])


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


obtained 10 items in 0.2 seconds


  self._df = pd.concat([self._df, dfx2], ignore_index=True)
100%|██████████| 2966/2966 [01:17<00:00, 38.40it/s]

1.0
0.04361582093269146





In [115]:
# Identify rejected individuals (those predicted as 0)
rejected_indices = np.flatnonzero(yhat == 0).astype(int)

# Create a subset of the data containing only the rejected individuals
rejected_X = X.iloc[rejected_indices]

# Run Recourse Audit only on the rejected individuals
audit_df_rejected = auditor.audit(rejected_X)

# Print mean feasibility and cost for rejected individuals only
print("Mean Feasibility for Rejected Individuals:", audit_df_rejected['feasible'].mean())
print("Mean Cost for Rejected Individuals:", audit_df_rejected['cost'].mean())


  0%|          | 0/2966 [00:00<?, ?it/s]

100%|██████████| 2966/2966 [01:14<00:00, 39.77it/s]

Mean Feasibility for Rejected Individuals: 1.0
Mean Cost for Rejected Individuals: 0.04361582093269146





In [2]:
import gurobipy as gp
from gurobipy import GRB
import random
import math
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import recourse as rs

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
url = 'https://raw.githubusercontent.com/ustunb/actionable-recourse/master/examples/paper/data/credit_processed.csv'
df = pd.read_csv(url)
y, X = df.iloc[:, 0], df.iloc[:, 1:]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
def create_bank_models(m):
    banks = {}
    
    # Create m non-overlapping splits of equal size
    # Shuffle data first to ensure random distribution
    shuffled_indices = np.random.permutation(len(X_train))
    split_size = len(X_train) // m  # Integer division
    
    X_splits = []
    y_splits = []
    
    # Create non-overlapping splits
    for j in range(m):
        if j < m-1:
            # For all banks except the last one
            start_idx = j * split_size
            end_idx = (j+1) * split_size
        else:
            # For the last bank, include any remaining data
            start_idx = j * split_size
            end_idx = len(X_train)
            
        # Get the indices for this split
        split_indices = shuffled_indices[start_idx:end_idx]
        
        # Extract the corresponding data
        X_split = X_train.iloc[split_indices]
        y_split = y_train.iloc[split_indices]
        
        X_splits.append(X_split)
        y_splits.append(y_split)
    
    # Create different models with different training data
    for j in range(m):
        if j == 0:
            # Bank 0: L1 penalty
            clf = LogisticRegression(penalty='l1', C=0.5, solver='liblinear', 
                                    max_iter=10000, random_state=42)
        elif j == 1:
            # Bank 1: L2 penalty, medium regularization
            clf = LogisticRegression(penalty='l2', C=1.0, max_iter=10000, random_state=42)
        elif j == 2:
            # Bank 2: L2 penalty, less regularization
            clf = LogisticRegression(penalty='l2', C=2.0, max_iter=10000, random_state=42)
        elif j == 3:
            # Bank 3: Standard parameters but will use custom threshold
            clf = LogisticRegression(penalty='l2', C=1.0, max_iter=10000, random_state=42)
        elif j == 4:
            # Bank 4: With class weights
            clf = LogisticRegression(penalty='l2', class_weight={0: 1, 1: 2}, 
                                    max_iter=10000, random_state=42)
        else:
            # Default model for any additional banks
            clf = LogisticRegression(max_iter=10000, random_state=j*10)
            
        # Fit the model with this bank's data split
        clf.fit(X_splits[j], y_splits[j])
        
        # Store model information
        bank_info = {'model': clf, 'name': f'Bank{j}-Model'}
        
        # Add threshold for Bank 3
        if j == 3:
            bank_info['threshold'] = 0.7
            
        banks[j] = bank_info
    
    return banks

In [5]:
# Create bank models
m = 5  # number of banks
banks = create_bank_models(m)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [6]:
# Generate predictions for all test data
bank_predictions = {}
for j in range(m):
    bank_model = banks[j]['model']
    bank_predictions[j] = bank_model.predict(X_test)

# Filter test data to individuals who are rejected by all banks (prediction == 0)
rejected_indices = []
for i in range(len(X_test)):
    rejected_by_all = True
    for j in range(m):
        if bank_predictions[j][i] == 1:  # If approved by any bank
            rejected_by_all = False
            break
    
    if rejected_by_all:
        rejected_indices.append(i)
        
# Get subset of rejected individuals
X_rejected = X_test.iloc[rejected_indices].values
actual_rejected_indices = X_test.index[rejected_indices]
n = len(X_rejected)  # number of seekers

print(f"Found {n} individuals rejected by all {m} banks")

Found 141 individuals rejected by all 5 banks


In [None]:
# Calculate recourse costs for each seeker-bank pair
costs = {}
weights = {}
beta = 0.1  # scaling parameter for the exponential transformation
seeker_features = {}
recourse_actions = {}

seeker_features = X_test.iloc[rejected_indices].reset_index(drop=True)

Unnamed: 0,Married,Single,Age_lt_25,Age_in_25_to_40,Age_in_40_to_59,...,MostRecentBillAmount,MostRecentPaymentAmount,TotalOverdueCounts,TotalMonthsOverdue,HistoryOfOverduePayments
0,1,0,0,1,0,...,70,0,1,28,1
1,1,0,1,0,0,...,860,50,1,12,1
2,1,0,0,0,1,...,810,50,1,12,1
3,1,0,0,0,1,...,600,0,1,13,1
4,1,0,0,1,0,...,210,40,1,16,1
...,...,...,...,...,...,...,...,...,...,...,...
136,1,0,0,0,1,...,2980,140,1,16,1
137,0,1,0,1,0,...,30,0,1,16,1
138,0,1,0,0,1,...,2270,0,1,14,1
139,1,0,0,0,1,...,70,0,1,28,1


In [14]:

for j in range(m):
    bank_model = banks[j]['model']
    
    # Create and align a new action set for each bank model
    A_bank = rs.ActionSet(X_train)
    A_bank['Married'].mutable = False
    A_bank['Age_lt_25'].mutable = False
    A_bank['Age_in_25_to_40'].mutable = False
    A_bank['Age_in_40_to_59'].mutable = False
    A_bank['Age_geq_60'].mutable = False
    A_bank['EducationLevel'].step_direction = 1
    A_bank['EducationLevel'].step_size = 1
    A_bank['EducationLevel'].step_type = "absolute"
    A_bank['EducationLevel'].bounds = (0, 3)
    A_bank['TotalMonthsOverdue'].step_size = 1
    A_bank['TotalMonthsOverdue'].step_type = "absolute"
    A_bank['TotalMonthsOverdue'].bounds = (0, 100)
    
    # Align action set with current bank model
    A_bank.set_alignment(bank_model)
    
    # Calculate recourse using RecourseAuditor
    auditor = rs.RecourseAuditor(
        action_set=A_bank, 
        coefficients=bank_model.coef_[0], 
        intercept=bank_model.intercept_[0]
    )
    
    recourse_result = auditor.audit(seeker_features)
    
    for index, r in recourse_result.iterrows():
        if r['feasible']:
            costs[index, j] = r['cost']
        else:
            costs[index, j] = 10000  # Assign a high cost for infeasible recourse
            
        weights[index, j] = math.exp(-beta * costs[index, j])

100%|██████████| 140/140 [00:03<00:00, 35.06it/s]
100%|██████████| 140/140 [00:04<00:00, 34.30it/s]
100%|██████████| 140/140 [00:03<00:00, 40.06it/s]
100%|██████████| 140/140 [00:03<00:00, 37.69it/s]
100%|██████████| 140/140 [00:03<00:00, 36.84it/s]


In [24]:
gamma = 0.05  # scaling parameter for the exponential transformation cost
alpha = 0.5  # weight for the capacity cost in the objective function
n = len(seeker_features)
### Gurobi Model Setup
model = gp.Model("LoanAssignment_Recourse_MultiDim")

# Create binary decision variables z[i,j] indicating if seeker i is assigned to bank j.
z = {}
for i in range(n):
    for j in range(m):
        z[i, j] = model.addVar(vtype=GRB.BINARY, name=f"z_{i}_{j}")

# Create integer decision variables for capacities
c = {}
for j in range(m):
    c[j] = model.addVar(vtype=GRB.INTEGER, name=f"capacity_{j}")
 
# Create auxiliary variables for the exponent input and the exponential result.    
expArg = {}
aux_exp = {}
for j in range(m):
    # Auxiliary variable for gamma * c[j]
    expArg[j] = model.addVar(vtype=GRB.CONTINUOUS, name=f"expArg_{j}")
    # Auxiliary variable for the exponential value
    aux_exp[j] = model.addVar(vtype=GRB.CONTINUOUS, name=f"aux_exp_{j}")
    # Link expArg[j] with gamma * c[j]
    model.addConstr(expArg[j] == gamma * c[j], name=f"expArgConstr_{j}")
    # Add the exponential general constraint: aux_exp[j] = exp(expArg[j])
    model.addGenConstrExp(expArg[j], aux_exp[j], name=f"expConstr_{j}")

# Set the objective: maximize total weight of the assignments minus the capacity cost.
model.setObjective(
    gp.quicksum(weights[i, j] * z[i, j] for i in range(n) for j in range(m)) - 
    alpha*gp.quicksum(aux_exp[j] for j in range(m)),
    GRB.MAXIMIZE
)

# Constraints:
# 1. Each seeker is assigned to at most one bank.
for i in range(n):
    model.addConstr(gp.quicksum(z[i, j] for j in range(m)) <= 1, name=f"seeker_{i}")

# 2. Each bank's assignments do not exceed its capacity (using decision variable c[j]).
for j in range(m):
    model.addConstr(gp.quicksum(z[i, j] for i in range(n)) <= c[j], name=f"bank_{j}")
    
obj_progress = []

# Define the callback function to track the objective progress
def my_callback(model, where):
    if where == gp.GRB.Callback.MIP:
        # Get the objective bound at the current node
        obj_value = model.cbGet(gp.GRB.Callback.MIP_OBJBND)
        obj_progress.append(obj_value)

# Set the callback
model.optimize(my_callback)


### Output the Solution and Recourse Recommendations
if model.status == GRB.OPTIMAL:
    print("\nOptimal assignment and recommended recourse actions:")
    for i in range(n):
        for j in range(m):
            if z[i, j].X > 0.5:
                action = costs[i, j]  # minimal change required
                print(f"Seeker {i} assigned to Bank {j} ")
                print(f"  Recourse cost (minimal change required): {action:.2f}")
    for j in range(m):
        print(f"Capacity for Bank {j}: {c[j].X}")
else:
    print("No optimal solution found.")


Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (win64 - Windows 11.0 (26100.2))

CPU model: Intel(R) Core(TM) i7-8565U CPU @ 1.80GHz, instruction set [SSE2|AVX|AVX2]
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 151 rows, 720 columns and 1425 nonzeros
Model fingerprint: 0x19ccc935
Model has 5 function constraints treated as nonlinear
  5 EXP
Variable types: 10 continuous, 710 integer (705 binary)
Coefficient statistics:
  Matrix range     [5e-02, 1e+00]
  Objective range  [5e-01, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve time: 0.00s
Presolved: 176 rows, 721 columns, 1480 nonzeros
Presolved model has 5 nonlinear constraint(s)
         in nonlinear terms.
         Presolve was not able to compute smaller bounds for these variables.
         Consider bounding these variables or reformulating the model.


Solving non-convex MINLP

Variable types: 11 continuous, 710 integer (705 binary)
Found he