<a href="https://colab.research.google.com/github/vimesh630/Revenue_Forecasting/blob/main/Feature_Engineering_for_Revenue_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mount Google Drive

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#Import Required Libraries

In [17]:
import pandas as pd
import numpy as np
import os

# Define File Paths

In [18]:
input_file_path = "/content/drive/MyDrive/VERGER/Revenue_Forecasting/preprocessed_forecast_data.csv"
output_dir = "/content/drive/MyDrive/VERGER/Revenue_Forecasting"
os.makedirs(output_dir, exist_ok=True)

# Load Dataset

In [19]:
df = pd.read_csv(input_file_path)
df['date'] = pd.to_datetime(df['Year'].astype(str) + '-' + df['Month'].astype(str).str.zfill(2) + '-01', format='%Y.0-%B-%d')
df.sort_values(by='date', inplace=True)

# Feature Engineering Function

In [20]:
def add_features(data):
    data = data.copy()
    group_cols = ['Account', 'Product', 'Type']
    data.set_index('date', inplace=True)

    for col in ['Forecast_Revenue', 'Forecast_Quantity']:
        for window in [1, 2, 3]:
            data[f'{col}_lag_{window}'] = data.groupby(group_cols)[col].shift(window)
        for window in [3, 6]:
            data[f'{col}_roll_mean_{window}'] = (
                data.groupby(group_cols)[col].shift(1).rolling(window=window).mean().reset_index(0, drop=True)
            )
            data[f'{col}_roll_std_{window}'] = (
                data.groupby(group_cols)[col].shift(1).rolling(window=window).std().reset_index(0, drop=True)
            )

    data.reset_index(inplace=True)
    data['month'] = data['date'].dt.month
    data['quarter'] = data['date'].dt.quarter
    data['year'] = data['date'].dt.year
    data = pd.get_dummies(data, columns=['Account', 'Product', 'Type'], drop_first=True)

    return data

# Apply Feature Engineering

In [21]:
df_fe = add_features(df)
df_fe.dropna(inplace=True)
df_fe.reset_index(drop=True, inplace=True)

# Split the Dataset

In [None]:
# === Split into Train and Test Sets (80/20) ===
cutoff_index = int(len(df_fe) * 0.8)
train_df = df_fe.iloc[:cutoff_index].copy()
test_df = df_fe.iloc[cutoff_index:].copy()

#Saving the Dataset

In [22]:
train_df.to_csv(os.path.join(output_dir, "train_feature_engineered_forecast_data.csv"), index=False)
test_df.to_csv(os.path.join(output_dir, "test_feature_engineered_forecast_data.csv"), index=False)