## Feature Engineering
Created additional features to enhance model performance:
- **Transaction Frequency:** Tracks the number of transactions over a specific period.
- **Time-related Features:** Includes features like the hour of the day and day of the week.
- **IP-based Geolocation Information:** Extracts geolocation data based on IP addresses to identify potential fraud patterns.


In [1]:
import os
import sys

sys.path.insert(0,os.path.dirname(os.getcwd()))

In [2]:
from scripts import featureEnginerring

In [3]:
import pandas as pd
fraudData=pd.read_csv("../Data/Merged_cleanData.csv")

In [4]:
feature_engineering=featureEnginerring.FeatureEngineer(fraudData)

In [5]:
feature_engineering.create_time_features()

In [6]:
feature_engineering.transaction_frequency()

In [7]:
feature_engineering.normalize_and_scale()

In [8]:
feature_engineering.encode_categorical_features()

In [None]:
featuredData=feature_engineering.get_feature_data()

In [10]:
featuredData.head()

Unnamed: 0.1,Unnamed: 0,purchase_value,age,ip_address,class,signup_hour,purchase_hour,signup_day,purchase_day,hour_of_day,day_of_week,country_encoded,transaction_count,source_Direct,source_SEO,browser_FireFox,browser_IE,browser_Opera,browser_Safari,sex_M
0,0,-0.160088,0.681533,732758368,0,22,2,1,5,2,5,0.097865,0.0,False,True,False,False,False,False,True
1,1,-1.142743,2.306556,350311387,0,20,1,6,0,1,0,0.095626,0.0,False,False,False,False,False,False,False
2,2,-1.197335,2.306556,2621473820,1,18,18,3,3,18,3,0.095626,0.0,False,True,False,False,True,False,True
4,4,0.112871,1.377971,415583117,0,7,18,1,2,18,2,0.095626,0.0,False,False,False,False,False,True,True
5,5,0.276647,-1.756001,2809315199,0,6,8,3,3,8,3,0.116975,0.0,False,False,False,False,False,False,True


In [11]:
featuredData.dtypes

Unnamed: 0             int64
purchase_value       float64
age                  float64
ip_address             int64
class                  int64
signup_hour            int64
purchase_hour          int64
signup_day             int64
purchase_day           int64
hour_of_day            int32
day_of_week            int32
country_encoded      float64
transaction_count    float64
source_Direct           bool
source_SEO              bool
browser_FireFox         bool
browser_IE              bool
browser_Opera           bool
browser_Safari          bool
sex_M                   bool
dtype: object

In [12]:
featuredData.isnull()

Unnamed: 0.1,Unnamed: 0,purchase_value,age,ip_address,class,signup_hour,purchase_hour,signup_day,purchase_day,hour_of_day,day_of_week,country_encoded,transaction_count,source_Direct,source_SEO,browser_FireFox,browser_IE,browser_Opera,browser_Safari,sex_M
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151106,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
151107,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
151108,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
151109,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [13]:
featuredData.to_csv("../Data/featuredData.csv")