### HANDLING DATE FEATURES

In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(0)
num_days = 100
date_rng = pd.date_range(start='2023-01-01', end='2023-04-10', freq='D')
df = pd.DataFrame(date_rng, columns=['date'])

In [3]:
df.head()

Unnamed: 0,date
0,2023-01-01
1,2023-01-02
2,2023-01-03
3,2023-01-04
4,2023-01-05


In [4]:
import datetime

df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

# Generate extra feature
df['day_of_week'] = df['date'].dt.dayofweek  # Monday=0, Sunday=6

In [5]:
df.head()

Unnamed: 0,date,year,month,day,day_of_week
0,2023-01-01,2023,1,1,6
1,2023-01-02,2023,1,2,0
2,2023-01-03,2023,1,3,1
3,2023-01-04,2023,1,4,2
4,2023-01-05,2023,1,5,3


### HANDLING TIME SERIES DATA

In [6]:
num_periods = 100
date_rng = pd.date_range(start='2023-01-01', periods=num_periods, freq='H')
df = pd.DataFrame(date_rng, columns=['timestamp'])
df.head()


Unnamed: 0,timestamp
0,2023-01-01 00:00:00
1,2023-01-01 01:00:00
2,2023-01-01 02:00:00
3,2023-01-01 03:00:00
4,2023-01-01 04:00:00


In [7]:
# Step 2: Extract features from the time series
df['year'] = df['timestamp'].dt.year
df['month'] = df['timestamp'].dt.month
df['day'] = df['timestamp'].dt.day

df['hour'] = df['timestamp'].dt.hour
df['minute'] = df['timestamp'].dt.minute
df['second'] = df['timestamp'].dt.second


In [8]:
df.head()

Unnamed: 0,timestamp,year,month,day,hour,minute,second
0,2023-01-01 00:00:00,2023,1,1,0,0,0
1,2023-01-01 01:00:00,2023,1,1,1,0,0
2,2023-01-01 02:00:00,2023,1,1,2,0,0
3,2023-01-01 03:00:00,2023,1,1,3,0,0
4,2023-01-01 04:00:00,2023,1,1,4,0,0


### HANDLING MIXED FEATURES

In [9]:
data = {
    'Mixed_Feature': ['B123', 'C124', 'A120', 'B125', 'C126'],
}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Mixed_Feature
0,B123
1,C124
2,A120
3,B125
4,C126


In [10]:
df["Mixed_Feature"].dtype

dtype('O')

In [11]:
df['Category'] = df['Mixed_Feature'].str[0] 
df['Numerical'] = df['Mixed_Feature'].str[1:].astype(int)  

In [12]:
df.head()

Unnamed: 0,Mixed_Feature,Category,Numerical
0,B123,B,123
1,C124,C,124
2,A120,A,120
3,B125,B,125
4,C126,C,126


In [13]:
df["Numerical"].dtype

dtype('int32')