In [1]:
import pickle
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier






In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import pickle

# Load the dataset
data = pd.read_csv('fraud.csv')

# Display the first few rows of the dataset
data.head()



Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [3]:
print(data.shape)

(6362620, 11)


In [4]:
# Handle missing values by filling them with zeros
data = data.fillna(0)

# Extract hour from 'step' (assuming 'step' represents hours)
data['hour'] = (data['step'] % 24).astype(int)

# Define numerical and categorical features
categorical_features = ['type']
numerical_features = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'hour']

# Split data into features and target
X = data[numerical_features + categorical_features]
y = data['isFraud']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Display shapes of the training and testing sets
X_train.shape, X_test.shape


((4453834, 7), (1908786, 7))

In [5]:
# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Define the model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train the model
model.fit(X_train, y_train)


In [6]:
# Predict on test data
y_pred = model.predict(X_test)

# Evaluate the model
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Display confusion matrix and classification report
print(conf_matrix)
print(class_report)


[[1906309      42]
 [    480    1955]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1906351
           1       0.98      0.80      0.88      2435

    accuracy                           1.00   1908786
   macro avg       0.99      0.90      0.94   1908786
weighted avg       1.00      1.00      1.00   1908786



In [7]:
def predict_transaction(transaction):
    # Ensure the transaction data is in the correct format
    transaction_df = pd.DataFrame([transaction])
    transaction_df = transaction_df[numerical_features + categorical_features]
    
    # Preprocess and predict
    prediction = model.predict(transaction_df)
    return prediction[0]

# Example transaction
transaction = {
    'amount': 1060.31,
    'oldbalanceOrg': 1089.0,
    'newbalanceOrig': 28.69,
    'oldbalanceDest': 0.0,
    'newbalanceDest': 0.0,
    'hour': 1,  # Assuming hour 1 for demonstration
    'type': 'TRANSFER'
}

# Predict fraud for the example transaction
prediction = predict_transaction(transaction)
print("Fraud" if prediction else "Not Fraud")


Fraud


In [8]:
# Save the model to a file
with open('fraud_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Load the model from a file
with open('fraud_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

# Verify that the loaded model works
loaded_model.predict(X_test)


array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [9]:

# Display the first few rows of the dataset
data.head()

# Add feature for account takeover: number of transactions per account
data['transactions_per_account'] = data.groupby('nameOrig')['nameOrig'].transform('count')

# Add feature for phishing and social engineering: number of transactions per hour
data['transactions_per_hour'] = data.groupby(['nameOrig', 'hour'])['nameOrig'].transform('count')

# Add feature for unauthorized transactions: average transaction amount per account
data['avg_transaction_amount'] = data.groupby('nameOrig')['amount'].transform('mean')

# Add feature for money laundering: amount transferred to new accounts
data['is_new_account'] = data['oldbalanceDest'] == 0

# Add feature for identity theft: transactions to unfamiliar accounts
data['transactions_per_destination'] = data.groupby('nameDest')['nameDest'].transform('count')

# Add feature for SIM swap fraud: rapid change in transaction pattern
data['change_in_transaction_pattern'] = data['amount'].diff().fillna(0)

# Define numerical and categorical features
categorical_features = ['type']
numerical_features = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'hour',
                      'transactions_per_account', 'transactions_per_hour', 'avg_transaction_amount', 'is_new_account',
                      'transactions_per_destination', 'change_in_transaction_pattern']


In [10]:

def predict_transaction(transaction):
    # Ensure the transaction data is in the correct format
    transaction_df = pd.DataFrame([transaction])
    transaction_df = transaction_df[numerical_features + categorical_features]

    # Preprocess and predict
    prediction = model.predict(transaction_df)
    return prediction[0]

In [11]:
def detect_fraud_type(transaction):
    """
    Detect the type of fraud based on transaction details.
    """
    fraud_types = []

    # Check conditions with realistic thresholds
    if transaction['is_new_account'] == 1:
        fraud_types.append("New Account Fraud")
    
    if transaction['transactions_per_account'] > 50:
        fraud_types.append("Account Takeover Fraud")
    
    if transaction['transactions_per_hour'] > 10:
        fraud_types.append("Unauthorized Transaction Fraud")
    
    if transaction['avg_transaction_amount'] > 10000:
        fraud_types.append("Money Laundering")

    if transaction['change_in_transaction_pattern'] > 100:
        fraud_types.append("Change in Transaction Pattern Fraud")

    if not fraud_types:
        fraud_types.append("No Specific Fraud Detected")
    
    return fraud_types




In [12]:
def get_transaction_input():
    """
    Function to get transaction details from user input.
    """
    print("Enter transaction details:")
    
    try:
        amount = float(input("Amount: "))
        oldbalanceOrg = float(input("Old Balance Origin: "))
        newbalanceOrig = float(input("New Balance Origin: "))
        oldbalanceDest = float(input("Old Balance Destination: "))
        newbalanceDest = float(input("New Balance Destination: "))
        hour = int(input("Hour (0-23): "))
        transaction_type = input("Transaction Type (CASH-IN, CASH-OUT, DEBIT, PAYMENT, TRANSFER): ").upper()
        transactions_per_account = int(input("Transactions per Account: "))
        transactions_per_hour = int(input("Transactions per Hour: "))
        avg_transaction_amount = float(input("Average Transaction Amount: "))
        is_new_account = int(input("Is New Account (0/1): "))
        transactions_per_destination = int(input("Transactions per Destination: "))
        change_in_transaction_pattern = float(input("Change in Transaction Pattern: "))

        if transaction_type not in ['CASH-IN', 'CASH-OUT', 'DEBIT', 'PAYMENT', 'TRANSFER']:
            raise ValueError("Invalid transaction type. Must be one of: CASH-IN, CASH-OUT, DEBIT, PAYMENT, TRANSFER.")

        # Prepare the input transaction for prediction
        transaction = {
            'amount': amount,
            'oldbalanceOrg': oldbalanceOrg,
            'newbalanceOrig': newbalanceOrig,
            'oldbalanceDest': oldbalanceDest,
            'newbalanceDest': newbalanceDest,
            'hour': hour,
            'type': transaction_type,
            'transactions_per_account': transactions_per_account,
            'transactions_per_hour': transactions_per_hour,
            'avg_transaction_amount': avg_transaction_amount,
            'is_new_account': is_new_account,
            'transactions_per_destination': transactions_per_destination,
            'change_in_transaction_pattern': change_in_transaction_pattern
        }

        return transaction
    except ValueError as e:
        print(f"Error: {e}")
        return None





In [13]:
# Example transaction with values that should trigger specific fraud detections
transaction = {
    'amount': 5000,
    'oldbalanceOrg': 3000,
    'newbalanceOrig': 500,
    'oldbalanceDest': 100,
    'newbalanceDest': 5500,
    'hour': 21,
    'type': 'TRANSFER',
    'transactions_per_account': 10,  # Update this based on your detection logic
    'transactions_per_hour': 5,
    'avg_transaction_amount': 700,
    'is_new_account': 0,
    'transactions_per_destination': 10,
    'change_in_transaction_pattern': 50
}

# Predict fraud for the example transaction
prediction = predict_transaction(transaction)
print("Fraud" if prediction else "Not Fraud")

# Detect fraud type only if prediction is fraud
if prediction:
    fraud_types = detect_fraud_type(transaction)
    print("Detected Fraud Types: ", ", ".join(fraud_types))


Not Fraud


In [None]:
def main():
    """
    Main function to run the fraud detection script with user input.
    """
    # Load the trained model
    with open('fraud_model.pkl', 'rb') as f:
        loaded_model = pickle.load(f)
    
    while True:
        # Get transaction input from the user
        transaction = get_transaction_input()
        
        if transaction:
                # Predict fraud for the transaction
                prediction = predict_transaction(transaction)
                print("Fraud" if prediction else "Not Fraud")
                
                # Detect fraud type based on transaction details
                fraud_types = detect_fraud_type(transaction)
                print("Detected Fraud Types: ", ", ".join(fraud_types))
        
        # Option to exit
        cont = input("Do you want to enter another transaction? (yes/no): ").lower()
        if cont != 'yes':
            break

if __name__ == "__main__":
    main()






Enter transaction details:
Amount: 5000
Old Balance Origin: 3000
New Balance Origin: 500
Old Balance Destination: 100
New Balance Destination: 
Error: could not convert string to float: ''
Do you want to enter another transaction? (yes/no): yes
Enter transaction details:
Amount: 5000
Old Balance Origin: 3000
New Balance Origin: 100
Old Balance Destination: 
Error: could not convert string to float: ''
Do you want to enter another transaction? (yes/no): yes
Enter transaction details:
Amount: 5000
Old Balance Origin: 3000
New Balance Origin: 500
Old Balance Destination: 100
New Balance Destination: 5500
Hour (0-23): 21
Transaction Type (CASH-IN, CASH-OUT, DEBIT, PAYMENT, TRANSFER): TRANSFER
Transactions per Account: 10
Transactions per Hour: 5
Average Transaction Amount: 700
Is New Account (0/1): 0
Transactions per Destination: 10
Change in Transaction Pattern: 50
Not Fraud
Detected Fraud Types:  No Specific Fraud Detected
Do you want to enter another transaction? (yes/no): YES
Enter tra