<a href="https://colab.research.google.com/github/vimesh630/Revenue_Forecasting/blob/main/New_XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#1. Import Libraries and Mount Google Drive

In [71]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#2. Load Dataset

In [72]:
input_file_path = "/content/drive/MyDrive/VERGER/Revenue_Forecasting/forecasting_data.csv"
df = pd.read_csv(input_file_path)

# Clean and ensure correct dtypes
df = df.dropna(subset=['Year', 'Month_No'])
df['Year'] = df['Year'].astype(int)
df['Month_No'] = df['Month_No'].astype(int)
df['Revenue'] = pd.to_numeric(df['Revenue'], errors='coerce').fillna(0)
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce').fillna(0)

# Create date column and sort
df['Date'] = pd.to_datetime(df['Year'].astype(str) + '-' + df['Month_No'].astype(str) + '-01')
df = df.sort_values(['Account','Product','Type','Date']).reset_index(drop=True)

#3. Encode Categorical Variables

In [73]:
label_encoders = {}
for col in ['Account','Product','Type']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le   # keep separate encoders

#4. Create lag Features

In [74]:
def create_lags(group, lags=[1,2,3]):
    group = group.sort_values('Date')
    for lag in lags:
        group[f'Revenue_lag{lag}'] = group['Revenue'].shift(lag).fillna(0)
        group[f'Quantity_lag{lag}'] = group['Quantity'].shift(lag).fillna(0)
    return group

df = df.groupby(['Account','Product','Type']).apply(create_lags).reset_index(drop=True)

  df = df.groupby(['Account','Product','Type']).apply(create_lags).reset_index(drop=True)


#5. Time Features

In [75]:
df['Month_Sin'] = np.sin(2*np.pi*df['Month_No']/12)
df['Month_Cos'] = np.cos(2*np.pi*df['Month_No']/12)

feature_cols = [
    'Account','Product','Type','Month_No','Month_Sin','Month_Cos',
    'Revenue_lag1','Revenue_lag2','Revenue_lag3',
    'Quantity_lag1','Quantity_lag2','Quantity_lag3'
]

X = df[feature_cols]
y_rev = df['Revenue']
y_qty = df['Quantity']

#6. Test-Train Split

In [76]:
X_train, X_test, y_train_rev, y_test_rev, y_train_qty, y_test_qty = train_test_split(
    X, y_rev, y_qty, test_size=0.2, random_state=42
)

#7. Train Models

In [77]:
xgb_params = {
    'objective': 'reg:squarederror',
    'n_estimators': 500,
    'learning_rate': 0.05,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'random_state': 42
}

# Revenue model
model_rev = xgb.XGBRegressor(**xgb_params)
model_rev.fit(X_train, y_train_rev)

# Quantity model
model_qty = xgb.XGBRegressor(**xgb_params)
model_qty.fit(X_train, y_train_qty)

#8. Evaluation

In [78]:
def evaluate_model(model, X_test, y_test, target_name):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"{target_name} -> RMSE: {rmse:.2f}, MAE: {mae:.2f}, R²: {r2:.4f}")

evaluate_model(model_rev, X_test, y_test_rev, 'Revenue')
evaluate_model(model_qty, X_test, y_test_qty, 'Quantity')

Revenue -> RMSE: 33035.91, MAE: 15672.99, R²: 0.4866
Quantity -> RMSE: 390.95, MAE: 212.91, R²: 0.4828


#9. Forecast Function

In [82]:
def forecast_account_month(account_name, year, month):
    """
    Forecasts revenue & quantity for all Product-Type combinations
    under a given account for a specific year/month.
    """
    # All product-type combinations for this account
    account_id = label_encoders['Account'].transform([account_name])[0]
    combos = df[df['Account']==account_id][['Product','Type']].drop_duplicates()

    results = []
    for _, row in combos.iterrows():
        prod_id = row['Product']
        type_id = row['Type']
        prod_name = label_encoders['Product'].inverse_transform([prod_id])[0]
        type_name = label_encoders['Type'].inverse_transform([type_id])[0]

        res = forecast(account_name, prod_name, type_name, year, month)
        results.append(res)

    return pd.DataFrame(results)


# Forecast all orders for RBUS in Sept 2025
forecast_table = forecast_account_month("RBUS", 2025, 9)
print(forecast_table)

    Year Month Account                        Product          Type  \
0   2025    09    RBUS               BLACK PEPPER OIL  Conventional   
1   2025    09    RBUS               BLACK PEPPER OIL       Organic   
2   2025    09    RBUS              CINNAMON BARK OIL  Conventional   
3   2025    09    RBUS              CINNAMON BARK OIL       Organic   
4   2025    09    RBUS  CINNAMON BARK OIL (SRI LANKA)  Conventional   
5   2025    09    RBUS    CINNAMON BARK OIL SRI LANKA  Conventional   
6   2025    09    RBUS   CINNAMON BARK OIL(SRI LANKA)  Conventional   
7   2025    09    RBUS              CINNAMON LEAF OIL  Conventional   
8   2025    09    RBUS              CINNAMON LEAF OIL       Organic   
9   2025    09    RBUS                 CITRONELLA OIL  Conventional   
10  2025    09    RBUS                  CLOVE BUD OIL  Conventional   
11  2025    09    RBUS                  CLOVE BUD OIL       Organic   
12  2025    09    RBUS    CLOVE BUD OIL(100% ORGANIC)       Organic   
13  20