In [1]:
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
df = pd.read_csv('train_action_history.csv')

In [3]:
df.dtypes

customer_code           object
Offer_id                object
Offer_subid             object
batch_id                 int64
product_category        object
product_sub_category    object
send_timestamp          object
open_timestamp          object
dtype: object

In [4]:
df.head()

Unnamed: 0,customer_code,Offer_id,Offer_subid,batch_id,product_category,product_sub_category,send_timestamp,open_timestamp
0,00199d3467a7191db5bfa4e5f9a62eeb96fb0b602c3ec5...,AC_100044882,VR_200098111,70000078,CC_ACQ_SECURED,SECURED_ACQ,2024-10-22T17:35:27.000Z,
1,001f2abab1bccc25d00bba68fea57a81cab1c76d485515...,AC_100048426,VR_200108485,10000253,SIP / MF,ACQUISITION,2024-08-09T18:20:18.000Z,
2,00298fc11fb6924004c041f141f92c3c74e209d9a737d9...,AC_100052603,VR_200127708,70000074,CC_INORGANIC,EMI,2024-09-20T10:45:20.000Z,2024-09-20T10:45:31.000Z
3,0038a46221c0175fc8938ebc8aef8d0f83b3ac1ad84662...,AC_100047006,VR_200105035,70000072,CC_ACQ_SECURED,SECURED_ACQ,2024-09-06T12:37:16.000Z,
4,00409395a831af7fd41d0ec70a5be3bda13d64cb3e4be9...,AC_100046122,VR_200100845,10000246,RURAL,BALANCE BUILD UP,2024-06-23T11:03:13.000Z,


In [5]:
time_slots = [
    (9, 12), (12, 15), (15, 18), (18, 21)  
]

def get_time_slot(timestamp):
    if pd.isnull(timestamp):
        return None  
    try:
        dt = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')
        weekday = dt.weekday()  
        hour = dt.hour
        for i, (start, end) in enumerate(time_slots):
            if start <= hour < end:
                day_slot = weekday * 4 + (i + 1) 
                return day_slot
        return None
    except Exception:
        return None

df = df.dropna(subset=['send_timestamp'])
df['send_timestamp'] = df['send_timestamp'].apply(get_time_slot)
df['send_timestamp'].fillna(df['send_timestamp'].mode()[0], inplace=True)
df['send_timestamp'] = df['send_timestamp'].astype('int64')
df['open_timestamp'] = df['open_timestamp'].apply(get_time_slot)
df['open_timestamp'].fillna(0, inplace=True)
df['open_timestamp'] = df['open_timestamp'].astype('int64')
df = df.rename(columns={'customer_code': 'CUSTOMER_CODE'})

In [6]:
df = df.drop(columns=['batch_id'])

In [7]:
exclude_columns = ['CUSTOMER_CODE']
object_columns = [col for col in df.select_dtypes(include=['object']).columns if col not in exclude_columns]
label_encoders = {}
for col in object_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le 

In [8]:
df.isna().sum()

CUSTOMER_CODE           0
Offer_id                0
Offer_subid             0
product_category        0
product_sub_category    0
send_timestamp          0
open_timestamp          0
dtype: int64

In [9]:
df.head()

Unnamed: 0,CUSTOMER_CODE,Offer_id,Offer_subid,product_category,product_sub_category,send_timestamp,open_timestamp
0,00199d3467a7191db5bfa4e5f9a62eeb96fb0b602c3ec5...,72,246,1,64,7,0
1,001f2abab1bccc25d00bba68fea57a81cab1c76d485515...,193,437,26,4,20,0
2,00298fc11fb6924004c041f141f92c3c74e209d9a737d9...,644,1716,4,30,17,17
3,0038a46221c0175fc8938ebc8aef8d0f83b3ac1ad84662...,153,381,1,64,18,0
4,00409395a831af7fd41d0ec70a5be3bda13d64cb3e4be9...,103,281,24,8,25,0


In [10]:
df.to_csv('HISTORY.csv', index=False)