<a href="https://colab.research.google.com/github/vraj2010/ML-Projects/blob/main/ML_Projects.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Credit Card Fraud Detection with Machine Learning**

**This project focuses on the detection of fraudulent credit card transactions using a machine learning approach, implemented in Python with the help of scikit-learn. The dataset used is the popular and real-world Credit Card Fraud Detection dataset from Kaggle, which contains anonymized transactions made by European cardholders in September 2013.**

**Due to the highly imbalanced nature of the dataset (only 492 fraud cases out of 284,807 transactions), special consideration is given to model training and evaluation.**

**Dataset Overview**

**Total Samples:** 284,807

**Features**: 30 numerical input features (V1–V28, Time, Amount)

**Target:** Class

*   0 → Legitimate Transaction
*   1 → Fraudulent Transaction

**Technologies Used**

Logistic Regression (with ElasticNet regularization)

1.   Python
2.   Pandas & NumPy
3.   Scikit-learn
4.   Joblib

In [48]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix
import joblib

In [49]:
df=pd.read_csv("/content/sample_data/creditcard.csv")

In [50]:
df.sample(8)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
154375,101340.0,-2.741313,3.074014,1.622743,3.693433,0.748769,2.610981,-0.341221,-3.399158,0.930505,...,2.23509,-1.915768,0.215161,-0.073342,0.326302,0.082843,0.57571,0.109676,38.88,0
193651,130228.0,1.569489,-0.758732,-1.967495,0.302238,0.273291,-0.581507,0.660462,-0.277869,0.287257,...,0.308537,0.48054,-0.166853,0.735241,0.263828,-0.595455,-0.057494,-0.019832,244.98,0
32993,37045.0,1.27309,-0.744403,1.083617,-0.682701,-1.537089,-0.50275,-1.064374,0.039786,-0.738666,...,0.465367,1.22964,-0.114756,0.604355,0.375965,-0.08367,0.032394,0.022759,34.99,0
279781,169084.0,2.031097,0.209589,-1.745605,0.560624,0.176888,-1.538658,0.311267,-0.41236,0.554523,...,0.200623,0.737398,-0.03474,-0.020127,0.254203,-0.10601,-0.002224,-0.026072,16.35,0
155465,105509.0,-0.866582,0.025573,1.1841,-3.529749,-0.630057,-1.137227,0.961144,-0.17823,3.175991,...,0.164169,0.715842,-0.083379,-0.030046,0.430528,-0.942468,0.318127,0.189065,140.08,0
246538,153216.0,2.090978,-0.115276,-1.467317,0.059333,0.427308,-0.301085,0.072752,-0.133147,0.323707,...,-0.302364,-0.759341,0.199356,-1.031964,-0.186239,0.24106,-0.074714,-0.080025,1.98,0
251537,155385.0,1.946365,-0.316224,-1.107447,-0.076527,0.125203,-0.147217,-0.143187,0.019026,0.300051,...,-0.40589,-1.357353,0.496803,0.238536,-0.716566,-0.008529,-0.082923,-0.047908,49.61,0
229204,145836.0,2.208234,-1.732466,0.120096,-1.370899,-2.229707,-0.52291,-1.865555,0.048544,-0.383152,...,-0.150729,0.055042,0.354016,-0.014619,-0.592115,-0.245385,0.057414,-0.030136,24.9,0


In [51]:
df.shape

(284807, 31)

In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [53]:
df["Class"].value_counts()

Unnamed: 0_level_0,count
Class,Unnamed: 1_level_1
0,284315
1,492


In [54]:
legit=df[df["Class"]==0]
fraud=df[df["Class"]==1]

In [55]:
legit.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [56]:
fraud.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
541,406.0,-2.312227,1.951992,-1.609851,3.997906,-0.522188,-1.426545,-2.537387,1.391657,-2.770089,...,0.517232,-0.035049,-0.465211,0.320198,0.044519,0.17784,0.261145,-0.143276,0.0,1
623,472.0,-3.043541,-3.157307,1.088463,2.288644,1.359805,-1.064823,0.325574,-0.067794,-0.270953,...,0.661696,0.435477,1.375966,-0.293803,0.279798,-0.145362,-0.252773,0.035764,529.0,1
4920,4462.0,-2.30335,1.759247,-0.359745,2.330243,-0.821628,-0.075788,0.56232,-0.399147,-0.238253,...,-0.294166,-0.932391,0.172726,-0.08733,-0.156114,-0.542628,0.039566,-0.153029,239.93,1
6108,6986.0,-4.397974,1.358367,-2.592844,2.679787,-1.128131,-1.706536,-3.496197,-0.248778,-0.247768,...,0.573574,0.176968,-0.436207,-0.053502,0.252405,-0.657488,-0.827136,0.849573,59.0,1
6329,7519.0,1.234235,3.01974,-4.304597,4.732795,3.624201,-1.357746,1.713445,-0.496358,-1.282858,...,-0.379068,-0.704181,-0.656805,-1.632653,1.488901,0.566797,-0.010016,0.146793,1.0,1


In [57]:
legit["Amount"].describe()

Unnamed: 0,Amount
count,284315.0
mean,88.291022
std,250.105092
min,0.0
25%,5.65
50%,22.0
75%,77.05
max,25691.16


In [58]:
fraud["Amount"].describe()

Unnamed: 0,Amount
count,492.0
mean,122.211321
std,256.683288
min,0.0
25%,1.0
50%,9.25
75%,105.89
max,2125.87


In [59]:
X=df.drop(columns="Class",axis=1)
Y=df["Class"]

In [60]:
scaler=StandardScaler()

In [61]:
X=scaler.fit_transform(X)

In [62]:
X

array([[-1.99658302, -0.69424232, -0.04407492, ...,  0.33089162,
        -0.06378115,  0.24496426],
       [-1.99658302,  0.60849633,  0.16117592, ..., -0.02225568,
         0.04460752, -0.34247454],
       [-1.99656197, -0.69350046, -0.81157783, ..., -0.13713686,
        -0.18102083,  1.16068593],
       ...,
       [ 1.6419735 ,  0.98002374, -0.18243372, ...,  0.01103672,
        -0.0804672 , -0.0818393 ],
       [ 1.6419735 , -0.12275539,  0.32125034, ...,  0.26960398,
         0.31668678, -0.31324853],
       [ 1.64205773, -0.27233093, -0.11489898, ..., -0.00598394,
         0.04134999,  0.51435531]])

In [63]:
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=2)

In [64]:
print(X.shape,x_train.shape,x_test.shape)

(284807, 30) (227845, 30) (56962, 30)


In [65]:
lor=LogisticRegression(class_weight="balanced",penalty="elasticnet",l1_ratio=0.3,solver="saga")# by default penalty=l2

In [66]:
lor.fit(x_train,y_train)



In [68]:
accuracy_score(y_train,lor.predict(x_train))

0.9775988062059734

In [69]:
y_pred=lor.predict(x_test)

In [70]:
accuracy_score(y_test,y_pred)

0.9774937677750078

In [71]:
confusion_matrix(y_test,y_pred)

array([[55602,  1276],
       [    6,    78]])

In [72]:
scaler=joblib.dump(scaler,"scaler.pkl")

In [73]:
def standardize_input(input_data):

    features_names=df.drop(columns="Class",axis=1).columns

    input_data=pd.DataFrame([input_data],columns=features_names)

    scaler=joblib.load("scaler.pkl")

    input_data=scaler.transform(input_data)
    return input_data

In [74]:
def predictive_system(input_data):

  input_data=standardize_input(input_data)

  prediction=lor.predict(input_data)

  if(prediction[0]==0):
    print("The transaction is legit")
  else:
    print("The transaction is fraudulent")

In [75]:
input_data=(406,-2.3122265423263,1.95199201064158,-1.60985073229769,3.9979055875468,-0.522187864667764,-1.42654531920595,-2.53738730624579,1.39165724829804,-2.77008927719433,-2.77227214465915,3.20203320709635,-2.89990738849473,-0.595221881324605,-4.28925378244217,0.389724120274487,-1.14074717980657,-2.83005567450437,-0.0168224681808257,0.416955705037907,0.126910559061474,0.517232370861764,-0.0350493686052974,-0.465211076182388,0.320198198514526,0.0445191674731724,0.177839798284401,0.261145002567677,-0.143275874698919,0)
predictive_system(input_data)

The transaction is fraudulent
