<a href="https://colab.research.google.com/github/vimesh630/Revenue_Forecasting/blob/main/Feature_Engineering_for_Revenue_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#1. Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
import os

from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


#2. Define File Paths

In [2]:
input_path = "/content/drive/MyDrive/VERGER/Revenue_Forecasting/preprocessed_forecast_data.csv"
output_dir = "/content/drive/MyDrive/VERGER/Revenue_Forecasting"
os.makedirs(output_dir, exist_ok=True)

#3. Load and Check the Dataset

In [3]:
df = pd.read_csv(input_path)

# === Ensure sorting (Account → Year → Month_No) ===
df = df.sort_values(by=['Account', 'Year', 'Month_No']).reset_index(drop=True)

# === Feature engineering per account ===
df_fe = df.copy()

#4. Feature Engineering

In [4]:
# Lag features
df_fe['lag_1m_revenue'] = df_fe.groupby('Account')['Forecast_Revenue'].shift(1)
df_fe['lag_3m_revenue'] = df_fe.groupby('Account')['Forecast_Revenue'].shift(3)
df_fe['lag_6m_revenue'] = df_fe.groupby('Account')['Forecast_Revenue'].shift(6)

# Rolling averages
df_fe['rolling_3m_avg_revenue'] = (
    df_fe.groupby('Account')['Forecast_Revenue']
    .shift(1)
    .rolling(window=3)
    .mean()
    .reset_index(level=0, drop=True)
)
df_fe['rolling_6m_avg_revenue'] = (
    df_fe.groupby('Account')['Forecast_Revenue']
    .shift(1)
    .rolling(window=6)
    .mean()
    .reset_index(level=0, drop=True)
)

# Revenue growth rate (month over month %)
df_fe['revenue_growth_pct'] = df_fe.groupby('Account')['Forecast_Revenue'].pct_change(periods=1)

# Cumulative revenue
df_fe['cumulative_revenue'] = df_fe.groupby('Account')['Forecast_Revenue'].cumsum()

#5. Split the Dataset

In [6]:
# Sort again to ensure chronological order
df_fe = df_fe.sort_values(by=['Year', 'Month_No']).reset_index(drop=True)

cutoff_index = int(len(df_fe) * 0.8)
train_df = df_fe.iloc[:cutoff_index].copy()
test_df = df_fe.iloc[cutoff_index:].copy()

#6. Save Files

In [7]:
full_output_path = os.path.join(output_dir, "full_feature_engineered_forecast_data.csv")
train_output_path = os.path.join(output_dir, "train_feature_engineered_forecast_data.csv")
test_output_path = os.path.join(output_dir, "test_feature_engineered_forecast_data.csv")

df_fe.to_csv(full_output_path, index=False)
train_df.to_csv(train_output_path, index=False)
test_df.to_csv(test_output_path, index=False)

print(f"✅ Feature engineering & time-based split complete!")
print(f"Full dataset saved to: {full_output_path}")
print(f"Train set shape: {train_df.shape}, saved to: {train_output_path}")
print(f"Test set shape: {test_df.shape}, saved to: {test_output_path}")

✅ Feature engineering & time-based split complete!
Full dataset saved to: /content/drive/MyDrive/VERGER/Revenue_Forecasting/full_feature_engineered_forecast_data.csv
Train set shape: (1042, 26), saved to: /content/drive/MyDrive/VERGER/Revenue_Forecasting/train_feature_engineered_forecast_data.csv
Test set shape: (261, 26), saved to: /content/drive/MyDrive/VERGER/Revenue_Forecasting/test_feature_engineered_forecast_data.csv
