In [4]:
import seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Download data

url = "https://drive.google.com/file/d/1Ak1FSqqgWb5jZVSSyFI6PuWml14K0rI3/view?usp=sharing"
gdown.download(url, "transactions_modified.csv", quiet=False)


# Load the data
transactions = pd.read_csv('transactions_modified.csv')
print(transactions.head())
print(transactions.info())

# How many fraudulent transactions?
total_f_transactions = 0
fraud = transactions.iloc[:, 9]
for i in range(len(fraud)):
  total_f_transactions += fraud[i]

print(total_f_transactions)

# Summary statistics on amount column
# Mean:
total = 0
amounts = transactions.iloc[:, 2]

for i in range(len(amounts)):
  total += amounts[i]

print(total/len(amounts))

# Standard Deviation:

std_d = np.std(amounts, ddof = 0)
print(std_d)

# Create isPayment field
transactions['isPayment'] = transactions['type'].apply(lambda x: 1 if x == 'PAYMENT' or x == 'DEBIT' else 0)

# Create isMovement field
transactions['isMovement'] = transactions['type'].apply(lambda x: 1 if x == 'CASH_OUT' or x == 'TRANSFER' else 0)

# Create accountDiff field
transactions['accountDiff'] = transactions['oldbalanceDest'] - transactions['oldbalanceOrg']

# Create features and label variables
features = transactions[['amount', 'isPayment', 'isMovement', 'accountDiff']]

# Split dataset
features_train, features_test, labels_train, labels_test = train_test_split(features, transactions['isFraud'], test_size=0.3, random_state=42)

# Normalize the features variables
scaler = StandardScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)


# Fit the model to the training data
logr = LogisticRegression()
logr.fit(features_train_scaled, labels_train)
labels_predicted = logr.predict(features_test_scaled)
labels_predicted = labels_predicted

# Score the model on the training data
print(logr.score(features_train_scaled, labels_train))

# Score the model on the test data
print(logr.score(features_test_scaled, labels_test))

# Print the model coefficients
print(logr.coef_)

# New transaction data
transaction1 = np.array([123456.78, 0.0, 1.0, 54670.1])
transaction2 = np.array([98765.43, 1.0, 0.0, 8524.75])
transaction3 = np.array([543678.31, 1.0, 0.0, 510025.5])

# Create a new transaction
transaction4 = np.array([130000, 1.0, 0.0, 129500])

# Combine new transactions into a single array
trans = np.column_stack((transaction1, transaction2, transaction3, transaction4))

# Normalize the new transactions
trans_scaled = scaler.transform(trans)

# Predict fraud on the new transactions
outcomes = logr.predict(trans_scaled)
print(outcomes)
print(logr.predict_proba(trans_scaled))

FileNotFoundError: [Errno 2] No such file or directory: 'transactions_modified.csv'