In [1]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder

In [2]:


# Step 2: Load Data
data = pd.read_csv('./sales_df.csv')
data.head()


Unnamed: 0,bike_name,bike_type,date_ordered,quantity
0,TrailBlazer XT,Mountain Bike,2020-12-14,2
1,UrbanCommuter 500,Hybrid Bike,2020-07-26,3
2,Mountain Climber XT,Mountain Bike,2020-10-08,5
3,TrailBlazer XT,Mountain Bike,2019-08-10,8
4,AirMaster 300,BMX,2019-12-20,11


In [3]:

# Step 3: Data Preprocessing
data['date_ordered'] = pd.to_datetime(data['date_ordered'])
data = data.sort_values(['bike_name', 'date_ordered'])  # Sort by product and date

# Step 4: Feature Engineering
# Extract time-based features
data['year'] = data['date_ordered'].dt.year
data['month'] = data['date_ordered'].dt.month
data['day'] = data['date_ordered'].dt.day
data['day_of_week'] = data['date_ordered'].dt.dayofweek

# Lag features for each product's previous quantity
data['quantity_lag1'] = data.groupby('bike_name')['quantity'].shift(1)
data['rolling_avg_7'] = data.groupby('bike_name')['quantity'].rolling(window=7).mean().reset_index(0, drop=True)

# Drop rows with NaN values from lag/rolling features
data = data.dropna()
data.head()


Unnamed: 0,bike_name,bike_type,date_ordered,quantity,year,month,day,day_of_week,quantity_lag1,rolling_avg_7
14850,Speedster Pro,Road Bike,2018-01-06,3,2018,1,6,5,2.0,2.714286
1553,Speedster Pro,Road Bike,2018-01-07,2,2018,1,7,6,3.0,2.714286
4501,Speedster Pro,Road Bike,2018-01-07,2,2018,1,7,6,2.0,2.428571
12960,Speedster Pro,Road Bike,2018-01-07,2,2018,1,7,6,2.0,2.285714
3646,Speedster Pro,Road Bike,2018-01-08,3,2018,1,8,0,2.0,2.285714


In [6]:

# Step 5: Encode Categorical Data (Product)
le = LabelEncoder()
data['product_encoded'] = le.fit_transform(data['bike_name'])
# Step 6: Train-Test Split
data.to_csv('processed_sales_df.csv', index=False)

In [7]:
print(data)


               bike_name    bike_type date_ordered  quantity  year  month  \
14850      Speedster Pro    Road Bike   2018-01-06         3  2018      1   
1553       Speedster Pro    Road Bike   2018-01-07         2  2018      1   
4501       Speedster Pro    Road Bike   2018-01-07         2  2018      1   
12960      Speedster Pro    Road Bike   2018-01-07         2  2018      1   
3646       Speedster Pro    Road Bike   2018-01-08         3  2018      1   
...                  ...          ...          ...       ...   ...    ...   
7444   UrbanCommuter 500  Hybrid Bike   2023-12-25         9  2023     12   
8636   UrbanCommuter 500  Hybrid Bike   2023-12-26        12  2023     12   
1242   UrbanCommuter 500  Hybrid Bike   2023-12-27        13  2023     12   
6539   UrbanCommuter 500  Hybrid Bike   2023-12-28        14  2023     12   
8333   UrbanCommuter 500  Hybrid Bike   2023-12-28        12  2023     12   

       day  day_of_week  quantity_lag1  rolling_avg_7  product_encoded  
14