# Credit Card Fraud Detection

## Importing Dependencies

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Importing Dataset

In [2]:
df = pd.read_csv('../Datasets/creditcard.csv')

## Exploring the Dataset

In [3]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [4]:
df.shape

(284807, 31)

In [5]:
df.tail()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.01448,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.05508,2.03503,-0.738589,0.868229,1.058415,0.02433,0.294869,0.5848,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.24964,-0.557828,2.630515,3.03126,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.24044,0.530483,0.70251,0.689799,-0.377961,0.623708,-0.68618,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.0,0
284806,172792.0,-0.533413,-0.189733,0.703337,-0.506271,-0.012546,-0.649617,1.577006,-0.41465,0.48618,...,0.261057,0.643078,0.376777,0.008797,-0.473649,-0.818267,-0.002415,0.013649,217.0,0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [7]:
df.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [8]:
df['Class'].value_counts()

Class
0    284315
1       492
Name: count, dtype: int64

The dataset is highly imbalanced, making it challenging for the Machine Learning model to accurately identify fraudulent data.

Here,

0 ----> Legit Transaction

1 ----> Fraud Transaction

### Separating the Data for Analysis

In [9]:
legit = df[df['Class']==0]

In [10]:
legit

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [11]:
legit['Class'].value_counts()

Class
0    284315
Name: count, dtype: int64

In [12]:
fraud = df[df['Class']==1]

In [13]:
fraud

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
541,406.0,-2.312227,1.951992,-1.609851,3.997906,-0.522188,-1.426545,-2.537387,1.391657,-2.770089,...,0.517232,-0.035049,-0.465211,0.320198,0.044519,0.177840,0.261145,-0.143276,0.00,1
623,472.0,-3.043541,-3.157307,1.088463,2.288644,1.359805,-1.064823,0.325574,-0.067794,-0.270953,...,0.661696,0.435477,1.375966,-0.293803,0.279798,-0.145362,-0.252773,0.035764,529.00,1
4920,4462.0,-2.303350,1.759247,-0.359745,2.330243,-0.821628,-0.075788,0.562320,-0.399147,-0.238253,...,-0.294166,-0.932391,0.172726,-0.087330,-0.156114,-0.542628,0.039566,-0.153029,239.93,1
6108,6986.0,-4.397974,1.358367,-2.592844,2.679787,-1.128131,-1.706536,-3.496197,-0.248778,-0.247768,...,0.573574,0.176968,-0.436207,-0.053502,0.252405,-0.657488,-0.827136,0.849573,59.00,1
6329,7519.0,1.234235,3.019740,-4.304597,4.732795,3.624201,-1.357746,1.713445,-0.496358,-1.282858,...,-0.379068,-0.704181,-0.656805,-1.632653,1.488901,0.566797,-0.010016,0.146793,1.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00,1
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89,1
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00,1


In [14]:
fraud['Class'].value_counts()

Class
1    492
Name: count, dtype: int64

### Statistical Measures of the Data

In [15]:
legit["Amount"].describe()

count    284315.000000
mean         88.291022
std         250.105092
min           0.000000
25%           5.650000
50%          22.000000
75%          77.050000
max       25691.160000
Name: Amount, dtype: float64

In [16]:
fraud['Amount'].describe()

count     492.000000
mean      122.211321
std       256.683288
min         0.000000
25%         1.000000
50%         9.250000
75%       105.890000
max      2125.870000
Name: Amount, dtype: float64

The mean value of Fraud data is more than Legit data

### Comparing values of both transaction types

In [17]:
df.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,94838.202258,0.008258,-0.006271,0.012171,-0.00786,0.005453,0.002419,0.009637,-0.000987,0.004467,...,-0.000644,-0.001235,-2.4e-05,7e-05,0.000182,-7.2e-05,-8.9e-05,-0.000295,-0.000131,88.291022
1,80746.806911,-4.771948,3.623778,-7.033281,4.542029,-3.151225,-1.397737,-5.568731,0.570636,-2.581123,...,0.372319,0.713588,0.014049,-0.040308,-0.10513,0.041449,0.051648,0.170575,0.075667,122.211321


### Under Sampling

We will build a sample dataset containing distribution of Normal transactions and Fraudulent transactions

The number of Normal Transactions: 284315

The number of Fraudulant Transactions: 492

We will randomly select 492 transactions from the normal transactions and combine them with all fraudulent transactions, resulting in a balanced dataset of 492 normal and 492 fraudulent transactions.

In [18]:
legit_sample = legit.sample(n=492)

In [19]:
legit_sample

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
148862,90265.0,1.729933,-1.653311,-0.773886,-1.632008,-1.381941,-1.040844,-0.445591,-0.443636,2.205671,...,-0.752708,-1.629852,0.287511,-0.035015,-0.570053,-0.728893,-0.011889,-0.000043,235.77,0
242035,151304.0,0.087255,0.936303,-0.565227,-0.156631,0.773987,-1.366834,0.978950,-0.253406,0.048382,...,0.266814,0.962572,-0.159725,-0.183094,-0.300983,-0.131824,0.378202,0.192406,19.50,0
11893,20478.0,-0.534639,0.462400,2.614235,1.118064,-0.596837,1.599259,1.436459,-0.510462,2.145376,...,-0.544727,-0.287960,-0.055235,0.179379,-0.590695,-0.791150,-0.433968,-0.588619,195.87,0
15403,26790.0,-0.510225,0.181672,1.449477,-1.939204,0.559337,-0.697591,0.982108,-0.572913,-1.737282,...,0.274250,1.008818,-0.432498,0.277243,0.255431,-0.306282,-0.381809,-0.381949,15.00,0
119539,75486.0,-3.513324,1.896098,0.601442,-1.631377,-3.052883,-0.477448,-2.280466,2.462487,-0.876494,...,-0.028606,0.077921,0.074130,0.412670,0.214519,1.094080,0.084692,0.033503,17.50,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281677,170350.0,-0.148823,1.038218,1.938359,3.097934,1.259168,1.598648,0.851670,-0.434445,-0.747486,...,0.113990,0.914500,-0.233434,0.038539,-0.698946,0.027108,-0.512512,-0.460831,19.96,0
107947,70690.0,-1.142025,1.518889,0.836469,0.658066,-0.046930,-0.569060,0.494765,0.261603,-0.408308,...,0.077044,0.571575,-0.045704,0.329845,-0.103342,-0.347258,0.337747,0.069302,5.64,0
131934,79796.0,-1.520662,1.085022,1.720737,0.620109,-1.013583,1.431759,-0.764612,1.142216,0.308905,...,0.168465,0.710057,-0.382529,-0.624408,0.480698,0.778289,-0.366999,-0.081271,51.12,0
238617,149744.0,-2.286697,-0.175666,0.646327,1.111830,0.496260,-0.468410,1.927941,-0.792196,-0.330078,...,0.041371,0.901526,0.020175,0.009146,1.095028,-0.179925,-0.920017,0.658070,170.00,0


## Making Dataset for Model

Now, we are concatenating both datasets to create the final dataset for the machine learning model.

In [20]:
final_df = pd.concat([legit_sample, fraud], axis=0)

In [21]:
final_df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
148862,90265.0,1.729933,-1.653311,-0.773886,-1.632008,-1.381941,-1.040844,-0.445591,-0.443636,2.205671,...,-0.752708,-1.629852,0.287511,-0.035015,-0.570053,-0.728893,-0.011889,-4.3e-05,235.77,0
242035,151304.0,0.087255,0.936303,-0.565227,-0.156631,0.773987,-1.366834,0.97895,-0.253406,0.048382,...,0.266814,0.962572,-0.159725,-0.183094,-0.300983,-0.131824,0.378202,0.192406,19.5,0
11893,20478.0,-0.534639,0.4624,2.614235,1.118064,-0.596837,1.599259,1.436459,-0.510462,2.145376,...,-0.544727,-0.28796,-0.055235,0.179379,-0.590695,-0.79115,-0.433968,-0.588619,195.87,0
15403,26790.0,-0.510225,0.181672,1.449477,-1.939204,0.559337,-0.697591,0.982108,-0.572913,-1.737282,...,0.27425,1.008818,-0.432498,0.277243,0.255431,-0.306282,-0.381809,-0.381949,15.0,0
119539,75486.0,-3.513324,1.896098,0.601442,-1.631377,-3.052883,-0.477448,-2.280466,2.462487,-0.876494,...,-0.028606,0.077921,0.07413,0.41267,0.214519,1.09408,0.084692,0.033503,17.5,0


In [22]:
final_df['Class'].value_counts()

Class
0    492
1    492
Name: count, dtype: int64

In [23]:
final_df.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,95904.154472,0.020406,-0.002107,0.018263,-0.070979,0.029407,-0.071915,0.032069,0.054924,0.010342,...,0.017698,0.026622,0.030017,-0.017399,0.013122,0.018208,-0.026361,-0.007721,0.005109,87.08498
1,80746.806911,-4.771948,3.623778,-7.033281,4.542029,-3.151225,-1.397737,-5.568731,0.570636,-2.581123,...,0.372319,0.713588,0.014049,-0.040308,-0.10513,0.041449,0.051648,0.170575,0.075667,122.211321


## Splitting the Dataset into Features and Targets

In [24]:
X = final_df.drop('Class', axis=1)
y = final_df['Class']

In [25]:
X

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
148862,90265.0,1.729933,-1.653311,-0.773886,-1.632008,-1.381941,-1.040844,-0.445591,-0.443636,2.205671,...,-0.113556,-0.752708,-1.629852,0.287511,-0.035015,-0.570053,-0.728893,-0.011889,-0.000043,235.77
242035,151304.0,0.087255,0.936303,-0.565227,-0.156631,0.773987,-1.366834,0.978950,-0.253406,0.048382,...,0.079050,0.266814,0.962572,-0.159725,-0.183094,-0.300983,-0.131824,0.378202,0.192406,19.50
11893,20478.0,-0.534639,0.462400,2.614235,1.118064,-0.596837,1.599259,1.436459,-0.510462,2.145376,...,-0.085373,-0.544727,-0.287960,-0.055235,0.179379,-0.590695,-0.791150,-0.433968,-0.588619,195.87
15403,26790.0,-0.510225,0.181672,1.449477,-1.939204,0.559337,-0.697591,0.982108,-0.572913,-1.737282,...,0.146398,0.274250,1.008818,-0.432498,0.277243,0.255431,-0.306282,-0.381809,-0.381949,15.00
119539,75486.0,-3.513324,1.896098,0.601442,-1.631377,-3.052883,-0.477448,-2.280466,2.462487,-0.876494,...,-0.372509,-0.028606,0.077921,0.074130,0.412670,0.214519,1.094080,0.084692,0.033503,17.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,1.252967,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.226138,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.247968,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.306271,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00


In [26]:
y

148862    0
242035    0
11893     0
15403     0
119539    0
         ..
279863    1
280143    1
280149    1
281144    1
281674    1
Name: Class, Length: 984, dtype: int64

## Splitting the Dataset into Train and Test Set

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [28]:
X_train

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
51076,44781.0,0.985292,-0.830089,0.884408,0.154661,-1.182220,0.134619,-0.691562,0.253159,1.115972,...,0.092944,-0.151564,-0.502946,-0.054900,0.048391,0.075525,0.938123,-0.064902,0.017014,115.00
72075,54555.0,-0.359481,0.840725,1.494872,-0.071883,0.182615,-0.295729,0.668200,0.053743,-0.326617,...,0.086969,-0.234139,-0.507943,0.019473,0.135883,-0.359467,0.047320,0.084075,-0.138646,9.99
56703,47545.0,1.176716,0.557091,-0.490800,0.756424,0.249192,-0.781871,0.228750,-0.040840,-0.432111,...,-0.102772,-0.062166,-0.128168,-0.040176,0.110040,0.437891,0.368809,-0.018287,0.031173,0.76
150684,93888.0,-10.040631,6.139183,-12.972972,7.740555,-8.684705,-3.837429,-11.907702,5.833273,-5.731054,...,-0.082275,2.823431,1.153005,-0.567343,0.843012,0.549938,0.113892,-0.307375,0.061631,1.00
6882,8808.0,-4.617217,1.695694,-3.114372,4.328199,-1.873257,-0.989908,-4.577265,0.472216,0.472017,...,-0.039046,0.481830,0.146023,0.117039,-0.217565,-0.138776,-0.424453,-1.002041,0.890780,1.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39183,39729.0,-0.964567,-1.643541,-0.187727,1.158253,-2.458336,0.852222,2.785163,-0.303609,0.940006,...,1.784449,0.447180,0.536204,1.634061,0.203839,0.218749,-0.221886,-0.308555,-0.164500,776.83
124079,77167.0,-0.600042,0.991636,1.475234,1.219278,-0.021259,0.288774,0.521252,0.238261,-0.373411,...,0.056139,0.032559,0.349684,-0.126904,0.213338,-0.189138,-0.262659,0.295254,0.189485,25.77
143335,85285.0,-6.713407,3.921104,-9.746678,5.148263,-5.151563,-2.099389,-5.937767,3.578780,-4.684952,...,0.135711,0.954272,-0.451086,0.127214,-0.339450,0.394096,1.075295,1.649906,-0.394905,252.92
260772,159719.0,0.112084,0.653549,0.034644,0.142639,0.513051,-1.942439,1.243326,-0.602037,-0.045956,...,0.072108,0.351159,1.200959,-0.009702,0.857300,-0.507096,-0.244059,-0.035774,-0.075245,42.81


In [29]:
X_test

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
230076,146179.0,-0.067672,4.251181,-6.540388,7.283657,0.513541,-2.635066,-1.865911,0.780272,-3.868248,...,0.519404,0.415437,-0.469938,0.007128,-0.388147,-0.493398,0.466468,0.566370,0.262990,0.77
262826,160665.0,-0.417340,4.700055,-7.521767,7.671884,0.260821,-2.646693,-2.854432,0.958783,-4.588536,...,0.832035,0.622200,-0.437708,-0.090358,-0.742802,-0.312361,0.502575,0.821390,0.372379,0.77
82146,59262.0,-1.686325,-0.346521,2.654900,0.500239,-0.037830,0.954406,-0.231928,0.565852,-0.141528,...,-0.071708,0.409950,1.202793,0.168883,-0.297814,0.197980,-0.174568,0.053898,0.103683,108.30
30314,35866.0,-2.044489,3.368306,-3.937111,5.623120,-3.079232,-1.253474,-5.778880,1.707428,-4.467103,...,1.112028,1.483594,0.834311,-0.148486,0.001669,-0.038996,0.389526,1.300236,0.549940,7.61
132133,79867.0,-0.808640,1.072073,1.664690,-0.691667,0.711321,0.008134,0.831335,0.144671,-1.027914,...,-0.003095,-0.163695,-0.552942,-0.210904,-0.335261,0.196877,0.039012,0.008660,0.058008,9.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118681,75172.0,1.000749,-0.146698,1.256358,1.177664,-0.865325,0.236922,-0.679140,0.327079,0.276361,...,-0.102815,0.213098,0.534712,0.008863,0.198004,0.175440,-0.363510,0.064216,0.034536,45.00
6472,7740.0,1.023874,2.001485,-4.769752,3.819195,-1.271754,-1.734662,-3.059245,0.889805,0.415382,...,0.204138,0.343283,-0.054196,0.709654,-0.372216,-2.032068,0.366778,0.395171,0.020206,1.00
163165,115706.0,0.042798,1.015239,0.290874,2.024714,0.888001,-0.571756,1.265778,-0.545343,-0.689023,...,0.144699,0.107245,0.728123,-0.249438,0.011154,-0.122129,-0.127560,-0.069335,-0.045601,7.80
52466,45463.0,-1.476893,2.122314,-1.229470,1.201849,-0.343264,-1.317704,-1.528142,-0.620953,-1.213040,...,0.276893,1.186036,-0.040215,-0.238930,0.110144,0.045418,-0.569232,0.481019,-0.047555,1.00


## Model Training

We are using Logistic Regression Model

In [30]:
model = LogisticRegression(max_iter=200)

The number of maximum iterations has been increased to 200, as the model was not converging with the default value(100).

In [31]:
model.fit(X_train, y_train)

## Model Evaluation

We are evalutaing the model based on Accuracy Score

In [32]:
train_predictions = model.predict(X_train)

In [33]:
print(accuracy_score(train_predictions, y_train))

0.9275730622617535


In [34]:
predictions = model.predict(X_test)

In [35]:
print(accuracy_score(y_test, predictions))

0.9644670050761421


The training accuracy is similar to the test accuracy, suggesting that the model is not overfitting.