# CREDIT CARD FRAUD DETECTION

### Importing all the libraries

In [None]:
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from  sklearn.ensemble import RandomForestClassifier

### Load and Explore data

In [None]:
df_train = pd.read_csv('E:/Encryptix/Credit_Card_Fraud_Detection/Dataset/Train.csv', header = 0)
df_test = pd.read_csv('E:/Encryptix/Credit_Card_Fraud_Detection/Dataset/Test.csv', header = 0)

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
df_train.tail()

In [None]:
df_train.shape

In [None]:
df_test.shape

In [None]:
df_train.size

In [None]:
df_test.size

In [None]:
df_train.info()

In [None]:
df_test.info()

In [None]:
df_train.describe()

In [None]:
df_test.describe()

In [None]:
df_train.isnull().values.any()

In [None]:
df_test.isnull().values.any()

In [None]:
df_train.count()

In [None]:
df_test.count()

In [None]:
df_combined = pd.concat([df_train, df_test], axis = 0)

In [None]:
df_combined.head()

In [None]:
df_combined.shape

In [None]:
df_combined.size

In [None]:
df_combined.info()

In [None]:
df_combined.drop(labels = ["first", "last", "job", "dob", "trans_num", "street", "trans_date_trans_time","city","state"], axis = 1, inplace = True)

In [None]:
df_combined.head()

### Data Visulization

In [None]:
sns.countplot(x='gender', data=df_combined)
plt.title("Gender Distribution")
plt.show()

In [None]:
correlation_matrix = df_combined.corr()
plt.figure(figsize = (12,8))
sns.heatmap(correlation_matrix, cmap = 'coolwarm', annot = False,  fmt=".2f")
plt.title("Correlation Matrix")
plt.show()

### Preprocess the Data

In [None]:
encoder = LabelEncoder()
new_col = encoder.fit_transform(df_combined["merchant"].values)
df_combined["merchant_new"] = new_col
df_combined.drop(labels = ["merchant"], axis = 1, inplace = True)

In [None]:
encoder = LabelEncoder()
new_col1 = encoder.fit_transform(df_combined["category"].values)
df_combined["category_new"] = new_col1
df_combined.drop(labels = ["category"], axis = 1, inplace = True)

In [None]:
df_combined = pd.get_dummies(df_combined)
df_combined.drop(labels=['gender_F'], axis = 1, inplace = True)

In [None]:
df_combined.head()

In [None]:
X = df_combined.drop("is_fraud", axis = 1)
y = df_combined["is_fraud"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Scaler = StandardScaler()
X_train = Scaler.fit_transform(X_train)
X_test = Scaler.transform(X_test)

### Train Logistic Regression Model

In [None]:
lr_model = LogisticRegression()

In [None]:
lr_model.fit(X_train, y_train)

In [None]:
lr_predictions = lr_model.predict(X_test)

In [None]:
print("Logistic Regression Model: ")
print(confusion_matrix(y_test, lr_predictions))
print(classification_report(y_test, lr_predictions))
print("Accuracy: ", accuracy_score(y_test, lr_predictions))

### Train Decision Tree Model

In [None]:
dt_model = DecisionTreeClassifier()

In [None]:
dt_model.fit(X_train, y_train)

In [None]:
dt_predictions = dt_model.predict(X_test)

In [None]:
print("Decision Tree Model: ")
print(confusion_matrix(y_test, dt_predictions))
print(classification_report(y_test, dt_predictions))
print("Accuracy: ", accuracy_score(y_test, dt_predictions))

###  Train Random Forest Model

In [None]:
rf_model = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=42)

In [None]:
rf_model.fit(X_train, y_train)

In [None]:
rf_predictions = rf_model.predict(X_test)

In [None]:
print("Random Forest Model: ")
print(confusion_matrix(y_test, rf_predictions))
print(classification_report(y_test, rf_predictions))
print("Accuracy: ", accuracy_score(y_test, rf_predictions))