<a href="https://colab.research.google.com/github/vimesh630/Revenue_Forecasting/blob/main/Dataset_Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [None]:
import pandas as pd

# Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Define File Paths

In [None]:
input_path = "/content/drive/MyDrive/VERGER/Revenue_Forecasting/forecasting_data.csv"
output_path = "/content/drive/MyDrive/VERGER/Revenue_Forecasting/forecasting_data_processed.csv"

df=pd.read_csv(input_path)

df.columns=df.columns.str.strip()

# Drop rows where Year or Month_No is missing
df = df.dropna(subset=["Year", "Month_No"])

# Ensure Year and Month_No are integers
df["Year"] = df["Year"].astype(int)
df["Month_No"] = df["Month_No"].astype(int)

# Create Peiod Column

In [None]:
df["Period"] = pd.to_datetime(
    df["Year"].astype(str) + "-" + df["Month_No"].astype(str).str.zfill(2)
)

# Sort the Data

In [None]:
df = df.sort_values(["Account", "Product", "Period"]).reset_index(drop=True)

# Create Lag Features

In [None]:
lags = [1, 3, 6]  # you can adjust these
for lag in lags:
    df[f"Revenue_Lag_{lag}"] = df.groupby(["Account", "Product"])["Revenue"].shift(lag)
    df[f"Quantity_Lag_{lag}"] = df.groupby(["Account", "Product"])["Quantity"].shift(lag)

# Create Rolling Features

In [None]:
windows = [3, 6]  # number of months
for window in windows:
    df[f"Revenue_Rolling_{window}"] = df.groupby(["Account", "Product"])["Revenue"].transform(lambda x: x.rolling(window).mean())
    df[f"Quantity_Rolling_{window}"] = df.groupby(["Account", "Product"])["Quantity"].transform(lambda x: x.rolling(window).mean())

# Save the File

In [None]:
df.to_csv(output_path, index=False)

print(f"✅ Processed dataset saved to: {output_path}")
print(df.head(10))

✅ Processed dataset saved to: /content/drive/MyDrive/VERGER/Revenue_Forecasting/forecasting_data_processed.csv
   Year      Month  Month_No Quarter Account  \
0  2024  September         9      Q3    AEIN   
1  2024       July         7      Q3    AEIN   
2  2023     August         8      Q3    AEIN   
3  2020    October        10      Q4    AFFR   
4  2019     March          3      Q1    AGFR   
5  2019        May         5      Q2    AGFR   
6  2019       June         6      Q2    AGFR   
7  2020   February         2      Q1    AGFR   
8  2020      April         4      Q2    AGFR   
9  2020    October        10      Q4    AGFR   

                              Product          Type  Quantity  Unit_Price  \
0              Pepper Black Oleoresin  Conventional    8000.0        24.0   
1        Pepper Black Oleoresin 40/20  Conventional    1200.0        27.0   
2  Pepper Black Sri Lanka Oleo 40/20   Conventional     300.0       125.0   
3                   CINNAMON LEAF OIL  Conventional 