In [7]:
import pandas as pd

# Load the data from each spreadsheet
cb = pd.read_excel("data/231013_Customer_Base.xlsx")
ti = pd.read_excel("data/231013_Transactions_Input.xlsx")
fc = pd.read_excel("data/231013_Fraud_Cases.xlsx")

In [8]:
cb.head()

Unnamed: 0,customer_id,home_bank,customer_prob
0,10298,bank_E,0.49
1,14507,bank_E,0.35
2,14558,bank_E,0.45
3,13453,bank_E,0.63
4,19629,bank_D,0.79


In [9]:
ti.head()

Unnamed: 0,transaction_id,description,Amount,category,date,month,customer_id,type,In_or_Out,bank_to,bank_from,transac_prob,description_prob,priority
0,1,Salary Deposit - XYZ Corporation,11474.33021,Income,2023-10-01,October,17699,income,paid_in,bank_A,bank_A,,,3
1,2,Online Retailer - Electronics Purchase,199.99,Online Shopping,2023-10-01,October,10298,spending,paid_out,bank_A,bank_E,0.78,0.25,3
2,3,Utilities Payment - Internet Service,60.0,Utilities,2023-10-01,October,10298,spending,paid_out,bank_C,bank_E,0.3,0.22,1
3,4,Rent Payment,1500.0,Housing,2023-10-01,October,14507,spending,paid_out,bank_D,bank_E,0.57,0.53,3
4,5,Paycheck Deposit - Company Name,10480.68449,Income,2023-10-01,October,14507,income,paid_in,bank_E,bank_D,,,3


In [10]:
fc.head()

Unnamed: 0,transaction_id,is_scam_transaction,fraud_type,case_id
0,187,1,AdvanceFee,5415844
1,247,1,Impersonation,4280670
2,818,1,Impersonation,6956462
3,891,1,Purchase,2702984
4,1142,1,Purchase,3493741


In [11]:
transactions = pd.merge(cb, ti, on='customer_id', how='inner')

In [12]:
transactions.head()

Unnamed: 0,customer_id,home_bank,customer_prob,transaction_id,description,Amount,category,date,month,type,In_or_Out,bank_to,bank_from,transac_prob,description_prob,priority
0,10298,bank_E,0.49,2,Online Retailer - Electronics Purchase,199.99,Online Shopping,2023-10-01,October,spending,paid_out,bank_A,bank_E,0.78,0.25,3
1,10298,bank_E,0.49,3,Utilities Payment - Internet Service,60.0,Utilities,2023-10-01,October,spending,paid_out,bank_C,bank_E,0.3,0.22,1
2,10298,bank_E,0.49,16,Paycheck Deposit - ABC Corp.,10427.6474,Income,2023-10-01,October,income,paid_in,bank_E,bank_E,,,3
3,10298,bank_E,0.49,33,Rent Payment,1200.0,Housing,2023-10-01,October,spending,paid_out,bank_E,bank_E,0.65,0.62,3
4,10298,bank_E,0.49,2869,Phone Bill Payment - Wireless Provider,70.0,Utilities,2023-10-02,October,spending,paid_out,bank_A,bank_E,0.48,0.34,1


In [14]:
transactions_out = transactions[transactions['In_or_Out'] == 'paid_in']

In [15]:
transactions_out.head()

Unnamed: 0,customer_id,home_bank,customer_prob,transaction_id,description,Amount,category,date,month,type,In_or_Out,bank_to,bank_from,transac_prob,description_prob,priority
2,10298,bank_E,0.49,16,Paycheck Deposit - ABC Corp.,10427.6474,Income,2023-10-01,October,income,paid_in,bank_E,bank_E,,,3
15,10298,bank_E,0.49,9312,Transfer from Savings to Current,300.0,Transfers,2023-10-08,October,income,paid_in,bank_E,bank_E,,,3
28,10298,bank_E,0.49,16449,Transfer from Savings to Current - Emergency Fund,500.0,Transfers,2023-10-16,October,income,paid_in,bank_E,bank_E,,,3
42,10298,bank_E,0.49,23738,Transfer from Savings to Current,250.0,Transfers,2023-10-23,October,income,paid_in,bank_E,bank_E,,,3
53,10298,bank_E,0.49,27243,Transfer from Savings to Current,250.0,Transfers,2023-10-27,October,income,paid_in,bank_E,bank_E,,,3


Write a function that takes in Parameters for the LP model and solves it.
Write a function that takes in the solvers solution and the real fraud cases for the day and provides meaningful metrics.
Write a function that updates the dynamic parameters based on the metrics.

In [2]:
# Step 1: Import PuLP
from pulp import LpProblem, LpMinimize, LpVariable, lpSum

# Assumed that the data from the dataframe is available in a suitable format
# For the purpose of this example, we will use dummy data structures

# Step 2: Define the data structures (dummy structures, replace with actual data)
transactions = [16, 9312, 16449, 23738, 27243]  # T
banks = ['A', 'B', 'C', 'D', 'E']  # Bk
priority_levels = [1, 2, 3, 4]  # P
fixed_costs = {1: 40, 2: 60, 3: 100, 4: 150}  # F_p
priority_multipliers = {1: 0.25, 2: 0.5, 3: 1, 4: 2}  # k_p
amounts = {16: 10427.6474, 9312: 300.0, 16449: 500.0, 23738: 250.0, 27243: 250.0}  # A_i
customer_prob = 0.49  # customer_prob_i
w1 = w2 = 0.5  # initial weights

# Step 3: Initialize the LP problem
lp_prob = LpProblem("Bank_Investigation_Problem", LpMinimize)

# Step 4: Add decision variables
x = LpVariable.dicts("x", ((i, j, p) for i in transactions for j in banks for p in priority_levels),
                     cat='Binary')
y = LpVariable.dicts("y", ((i, j, p) for i in transactions for j in banks for p in priority_levels),
                     cat='Binary')

# Step 5: Calculate Pr_i
Pr = {i: customer_prob * w1 + customer_prob * w2 for i in transactions}  # Modify as per actual formula

# Step 6: Construct the objective function
objective_function = lpSum([(1 - x[i, j, p]) * amounts[i] * priority_multipliers[p] * Pr[i] +
                            y[i, j, p] * fixed_costs[p] -
                            x[i, j, p] * amounts[i] * priority_multipliers[p] * Pr[i]
                            for i in transactions for j in banks for p in priority_levels])
lp_prob += objective_function

# Step 7: Add constraints

# Bank Capacity Constraints
for bank in banks:
    lp_prob += (lpSum([0.25 * x[i, bank, 1] + 0.5 * x[i, bank, 2] + x[i, bank, 3] + 2 * x[i, bank, 4]
                       for i in transactions]) <= 10, f"Capacity_Constraint_{bank}")

# Shared Resource Constraints (not applicable with the given data)

# External Investigator Constraints
for i in transactions:
    for j in banks:
        for p in priority_levels:
            lp_prob += (x[i, j, p] + y[i, j, p] <= 1, f"External_Investigator_Constraint_{i}_{j}_{p}")

# Step 8: Solve the LP problem
lp_prob.solve()

# The values of the decision variables can be printed out as follows
for v in lp_prob.variables():
    print(v.name, "=", v.varValue)

# The minimal cost is:
print("Total Expected Cost =", lp_prob.objective.value())


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/_g/3jdpr0d14q1g62hkp6vf5mlh0000gn/T/c59fe6ffe1024b51a5f3181d90d861cc-pulp.mps timeMode elapsed branch printingOptions all solution /var/folders/_g/3jdpr0d14q1g62hkp6vf5mlh0000gn/T/c59fe6ffe1024b51a5f3181d90d861cc-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 110 COLUMNS
At line 1011 RHS
At line 1117 BOUNDS
At line 1318 ENDATA
Problem MODEL has 105 rows, 200 columns and 300 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is -204471 - 0.00 seconds
Cgl0004I processed model has 5 rows, 80 columns (80 integer (60 of which binary)) and 80 elements
Cbc0038I Initial state - 5 integers unsatisfied sum - 1.75
Cbc0038I Pass   1: suminf.    0.37500 (2) obj. -201879 iterations 5
Cbc0