## MMCS Project | Fraud Detection Model

In [71]:
# Import the necessary libraries
import pandas as pd
from pulp import *
from fraud_detection import models
from fraud_detection import limits
from importlib import reload

In [72]:
# Loading the data
transactions = pd.read_parquet('data/transactions.parquet')
frauds = pd.read_parquet('data/frauds.parquet')

In [73]:
days = list(transactions['date'])
fraudulent_transactions = set(frauds['transaction_id'])

In [74]:
transactions_day1 = transactions.loc[transactions['date'] == days[0]]

In [75]:
categories = list(transactions_day1['category'].unique())
categories

['Utilities',
 'Housing',
 'Online Shopping',
 'Shopping',
 'Transportation',
 'Dining Out',
 'Transfers',
 'Groceries',
 'Streaming Services',
 'Electronics',
 'Credit Card Payment',
 'Healthcare',
 'Bank Fees',
 'Charity',
 'Home Improvement',
 'Loan Payment',
 'Holiday',
 'Investment',
 'Entertainment']

In [76]:
import random

# Randomly select categories
selected_categories = random.sample(categories, 5)

# Randomly assign ratios to the categories respecting the boundaries
rand_ratios = [random.uniform(0, 1) for sc in selected_categories]
rand_ratios

[0.8708730910770769,
 0.7421266774356777,
 0.08375794543127058,
 0.4365190540562639,
 0.7780692973336446]

In [77]:
modelling = reload(models)
# Example of a limits_category configuration
example_limits_category = {'Utilities': 0.3, 'Housing': 0.1, 'Shopping': 0.04}

# Example budget
budget_daily_example = 10000  # Example budget value

In [78]:
days_test = days[:10]

In [79]:
import random

In [80]:
random.uniform(0, 1)

0.5437935196401864

In [81]:
limits = reload(limits)
limits.generate(categories, 5, 0, 1)

{'Entertainment': 0.9432330629129247,
 'Streaming Services': 0.7805718809072995,
 'Bank Fees': 0.4943547922701773,
 'Home Improvement': 0.5638931220816672,
 'Dining Out': 0.7177192330386045}

In [82]:
categories

['Utilities',
 'Housing',
 'Online Shopping',
 'Shopping',
 'Transportation',
 'Dining Out',
 'Transfers',
 'Groceries',
 'Streaming Services',
 'Electronics',
 'Credit Card Payment',
 'Healthcare',
 'Bank Fees',
 'Charity',
 'Home Improvement',
 'Loan Payment',
 'Holiday',
 'Investment',
 'Entertainment']

In [83]:
from fraud_detection import tuning

In [84]:
tuning.find_minimizer_random(transactions_day1, budget_daily_example, fraudulent_transactions, categories, 10)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Users/uzaykaradag/.pyenv/versions/3.12.0/lib/python3.12/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/_g/3jdpr0d14q1g62hkp6vf5mlh0000gn/T/fb4369835511456682570b1d0be5fe11-pulp.mps max timeMode elapsed branch printingOptions all solution /var/folders/_g/3jdpr0d14q1g62hkp6vf5mlh0000gn/T/fb4369835511456682570b1d0be5fe11-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 3419 COLUMNS
At line 51168 RHS
At line 54583 BOUNDS
At line 66512 ENDATA
Problem MODEL has 3414 rows, 11928 columns and 23856 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 1418.75 - 0.01 seconds
Cgl0004I processed model has 1709 rows, 8538 columns (8538 integer (8538 of which binary)) and 17058 elements
Cutoff increment increased from 1e-05 to 0.04995
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I S

({'Housing': 0.15099764898154866,
  'Loan Payment': 0.5598623493797966,
  'Healthcare': 0.47722319415998704,
  'Transfers': 0.048321848114434296,
  'Online Shopping': 0.40380053031665575},
 265.35000000000036)