# **Fraud detection dataset cleaning using pandas**

In [None]:

import pandas as pd
df = pd.read_csv('/content/Fraud_Detection_Dataset_Cleaned.csv')

df.isnull().sum()



Unnamed: 0,0
Transaction_ID,0
User_ID,0
Transaction_Amount,0
Transaction_Type,0
Time_of_Transaction,0
Device_Used,0
Location,0
Previous_Fraudulent_Transactions,0
Account_Age,0
Number_of_Transactions_Last_24H,0


## **Removing Duplicates**

In [None]:
df = df.drop_duplicates()
df[df.duplicated(keep=False)]

Unnamed: 0,Transaction_ID,User_ID,Transaction_Amount,Transaction_Type,Time_of_Transaction,Device_Used,Location,Previous_Fraudulent_Transactions,Account_Age,Number_of_Transactions_Last_24H,Payment_Method,Fraudulent


## **Handling Nulls**

## Dropping rows from critical columns

In [None]:
df.isnull().sum()
df = df.dropna(subset=['Transaction_Amount'])

## Filling Values for non-critical columns

In [None]:
df['Time_of_Transaction'] = df['Time_of_Transaction'].fillna(df['Time_of_Transaction'].mode()[0])
df['Device_Used'] = df['Device_Used'].fillna('Unknown')
df['Location'] = df['Location'].fillna("Unknown")
df['Payment_Method'] = df['Payment_Method'].fillna("Unknown")
df.isnull().sum()

Unnamed: 0,0
Transaction_ID,0
User_ID,0
Transaction_Amount,0
Transaction_Type,0
Time_of_Transaction,0
Device_Used,0
Location,0
Previous_Fraudulent_Transactions,0
Account_Age,0
Number_of_Transactions_Last_24H,0


# Date Type Validation/Conversion

In [None]:
# check data types of all columns
print(df.dtypes)
# Convert Time_of_Transaction to integer
df['Time_of_Transaction'] = df['Time_of_Transaction'].astype(int)


Transaction_ID                       object
User_ID                               int64
Transaction_Amount                  float64
Transaction_Type                     object
Time_of_Transaction                 float64
Device_Used                          object
Location                             object
Previous_Fraudulent_Transactions      int64
Account_Age                           int64
Number_of_Transactions_Last_24H       int64
Payment_Method                       object
Fraudulent                            int64
dtype: object


# Standardization/Normalization

In [None]:
df['Device_Used'] = df['Device_Used'].replace('Unknown', 'Unknown Device')
print(df['Device_Used'].unique())

['Tablet' 'Mobile' 'Desktop' 'Unknown Device']


# Export cleaned dataframe to CSV

In [None]:
df.to_csv("Fraud_Detection_Dataset_Cleaned.csv", index=False)