## MMCS Project | Fraud Detection Model

In [62]:
# Import the necessary libraries
import pandas as pd
from pulp import *
from fraud_detection import modelling
from fraud_detection import limits
from importlib import reload

In [42]:
# Loading the data
transactions = pd.read_parquet('data/transactions.parquet')
frauds = pd.read_parquet('data/frauds.parquet')

In [43]:
days = list(transactions['date'])
fraudulent_transactions = set(frauds['transaction_id'])

In [44]:
transactions_day1 = transactions.loc[transactions['date'] == days[0]]

In [54]:
categories = list(transactions_day1['category'].unique())
categories

['Utilities',
 'Housing',
 'Online Shopping',
 'Shopping',
 'Transportation',
 'Dining Out',
 'Transfers',
 'Groceries',
 'Streaming Services',
 'Electronics',
 'Credit Card Payment',
 'Healthcare',
 'Bank Fees',
 'Charity',
 'Home Improvement',
 'Loan Payment',
 'Holiday',
 'Investment',
 'Entertainment']

In [61]:
import random

# Randomly select categories
selected_categories = random.sample(categories, 5)

# Randomly assign ratios to the categories respecting the boundaries
rand_ratios = [random.uniform(0, 1) for sc in selected_categories]
rand_ratios

[0.28976274578362693,
 0.787940425911029,
 0.8781635386920272,
 0.7740676631079323,
 0.626518552377433]

In [51]:
modelling = reload(modelling)
# Example of a limits_category configuration
example_limits_category = {'Utilities': 0.3, 'Housing': 0.1, 'Shopping': 0.04}

# Example budget
budget_daily_example = 10000  # Example budget value

# Calculate the loss for the example configuration
loss_example = modelling.calculate_loss(example_limits_category, transactions_day1, budget_daily_example, fraudulent_transactions)
loss_example

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Users/uzaykaradag/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/_g/3jdpr0d14q1g62hkp6vf5mlh0000gn/T/beaff0be0c954c0e97f624cbe33546b5-pulp.mps max timeMode elapsed branch printingOptions all solution /var/folders/_g/3jdpr0d14q1g62hkp6vf5mlh0000gn/T/beaff0be0c954c0e97f624cbe33546b5-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 3419 COLUMNS
At line 51168 RHS
At line 54583 BOUNDS
At line 66512 ENDATA
Problem MODEL has 3414 rows, 11928 columns and 23856 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 1418.75 - 0.01 seconds
Cgl0004I processed model has 1709 rows, 8538 columns (8538 integer (8538 of which binary)) and 17058 elements
Cutoff increment increased from 1e-05 to 0.04995
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I S

248.70000000000027

In [29]:
days_test = days[:10]

In [55]:
import random

In [60]:
random.uniform(0, 1)

0.6924521373733618

In [67]:
limits = reload(limits)
limits.generate(categories, 5, 0, 1)

{'Holiday': 0.6509211729422265,
 'Housing': 0.4106833686327289,
 'Home Improvement': 0.5947283192516574,
 'Credit Card Payment': 0.062343210503846946,
 'Healthcare': 0.5004652437391637}

In [68]:
categories

['Utilities',
 'Housing',
 'Online Shopping',
 'Shopping',
 'Transportation',
 'Dining Out',
 'Transfers',
 'Groceries',
 'Streaming Services',
 'Electronics',
 'Credit Card Payment',
 'Healthcare',
 'Bank Fees',
 'Charity',
 'Home Improvement',
 'Loan Payment',
 'Holiday',
 'Investment',
 'Entertainment']

In [69]:
from fraud_detection import tuning

In [70]:
tuning.find_minimizer_random(transactions_day1, budget_daily_example, fraudulent_transactions, categories, 10)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Users/uzaykaradag/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/_g/3jdpr0d14q1g62hkp6vf5mlh0000gn/T/7be223b1336448058d1cef1adabe5373-pulp.mps max timeMode elapsed branch printingOptions all solution /var/folders/_g/3jdpr0d14q1g62hkp6vf5mlh0000gn/T/7be223b1336448058d1cef1adabe5373-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 3419 COLUMNS
At line 51168 RHS
At line 54583 BOUNDS
At line 66512 ENDATA
Problem MODEL has 3414 rows, 11928 columns and 23856 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 1418.75 - 0.01 seconds
Cgl0004I processed model has 1709 rows, 8538 columns (8538 integer (8538 of which binary)) and 17058 elements
Cutoff increment increased from 1e-05 to 0.04995
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I S

({'Entertainment': 0.8169286831421477,
  'Charity': 0.41629834462377546,
  'Housing': 0.18581299422231556,
  'Home Improvement': 0.1688113139284646,
  'Healthcare': 0.0029587488759623692},
 379.2500000000002)