In [9]:
import seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
!pip install gdown
import gdown

# Download data

file_id = "1Ak1FSqqgWb5jZVSSyFI6PuWml14K0rI3"
url = f"https://drive.google.com/uc?id={file_id}"
gdown.download(url, "transactions_modified.csv", quiet=False)


# Load the data
transactions = pd.read_csv('transactions_modified.csv', encoding='latin-1')
print(transactions.head())

# How many fraudulent transactions?
total_f_transactions = 0
fraud = transactions.iloc[:, 9]
for i in range(len(fraud)):
  total_f_transactions += fraud[i]

print(total_f_transactions)

# Summary statistics on amount column
# Mean:
total = 0
amounts = transactions.iloc[:, 2]

for i in range(len(amounts)):
  total += amounts[i]

print("Mean transaction amount =", total/len(amounts))

# Standard Deviation:

std_d = np.std(amounts, ddof = 0)
print("SD of transaction amount =", std_d)

# Create isPayment field
transactions['isPayment'] = transactions['type'].apply(lambda x: 1 if x == 'PAYMENT' or x == 'DEBIT' else 0)

# Create isMovement field
transactions['isMovement'] = transactions['type'].apply(lambda x: 1 if x == 'CASH_OUT' or x == 'TRANSFER' else 0)

# Create accountDiff field
transactions['accountDiff'] = transactions['oldbalanceDest'] - transactions['oldbalanceOrg']

# Create features and label variables
features = transactions[['amount', 'isPayment', 'isMovement', 'accountDiff']]

# Split dataset
features_train, features_test, labels_train, labels_test = train_test_split(features, transactions['isFraud'], test_size=0.3, random_state=42)

# Normalize the features variables
scaler = StandardScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)


# Fit the model to the training data
logr = LogisticRegression()
logr.fit(features_train_scaled, labels_train)
labels_predicted = logr.predict(features_test_scaled)
labels_predicted = labels_predicted

# Score the model on the training data
print("Training set accuracy =", logr.score(features_train_scaled, labels_train))

# Score the model on the test data
print("Testing set accuracy =", logr.score(features_test_scaled, labels_test))

# Print the model coefficients
print("Weights of variables: Transaction Amount, Payment Type (Payment/Debit or not), Movement Type (Cash Out/Transfer or not) and Account Difference:", logr.coef_)

# New transaction data
transaction1 = np.array([123456.78, 0.0, 1.0, 54670.1])
transaction2 = np.array([98765.43, 1.0, 0.0, 8524.75])
transaction3 = np.array([543678.31, 1.0, 0.0, 510025.5])

# Create a new transaction
transaction4 = np.array([130000, 1.0, 0.0, 129500])

# Combine new transactions into a single array
trans = np.column_stack((transaction1, transaction2, transaction3, transaction4))

# Normalize the new transactions
trans_scaled = scaler.transform(trans)

# Predict fraud on the new transactions
outcomes = logr.predict(trans_scaled)
labels = np.where(outcomes == 1, 'Fraudulent', 'Legitimate')
print("Transaction Predictions:", labels)

print('P(Legitimate) P(Fraudulent)')
print(logr.predict_proba(trans_scaled))



Downloading...
From (original): https://drive.google.com/uc?id=1Ak1FSqqgWb5jZVSSyFI6PuWml14K0rI3
From (redirected): https://drive.google.com/uc?id=1Ak1FSqqgWb5jZVSSyFI6PuWml14K0rI3&confirm=t&uuid=44e37dee-b6ef-477f-92ba-884eb5ce8e5b
To: /content/transactions_modified.csv
100%|██████████| 494M/494M [00:07<00:00, 65.2MB/s]


   step      type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1   PAYMENT   9839.64  C1231006815       170136.0       160296.36   
1     1   PAYMENT   1864.28  C1666544295        21249.0        19384.72   
2     1  TRANSFER    181.00  C1305486145          181.0            0.00   
3     1  CASH_OUT    181.00   C840083671          181.0            0.00   
4     1   PAYMENT  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0        0               0  
1  M2044282225             0.0             0.0        0               0  
2   C553264065             0.0             0.0        1               0  
3    C38997010         21182.0             0.0        1               0  
4  M1230701703             0.0             0.0        0               0  
8213
Mean transaction amount = 179861.90354912292
SD of transaction amount = 603858.1840094082
Training s

