In [10]:
import pandas as pd
from prophet import Prophet
import joblib
import os


In [11]:
df = pd.read_csv("Assessment-2-Associate-DS(in).csv")
df.rename(columns={'SerailNum': 'SerialNum'}, inplace=True)
df['weekend_date'] = pd.to_datetime(df['weekend_date'], dayfirst=True, errors='coerce')
df.dropna(subset=['weekend_date'], inplace=True)


In [12]:
os.makedirs("saved_models", exist_ok=True)
forecast_results = []
accuracy_results = []

for serial in df['SerialNum'].unique():
    print(f"\n🔧 Processing SerialNum: {serial}")

    product_df = df[df['SerialNum'] == serial][['weekend_date', 'quantity']].copy()
    product_df.rename(columns={'weekend_date': 'ds', 'quantity': 'y'}, inplace=True)
    product_df.sort_values('ds', inplace=True)

    train = product_df[product_df['ds'] < '2024-06-01']
    valid = product_df[(product_df['ds'] >= '2024-06-01') & (product_df['ds'] <= '2024-08-31')]

    model = Prophet(weekly_seasonality=True, yearly_seasonality=True)
    model.fit(train)

    future_valid = model.make_future_dataframe(periods=len(valid), freq='W')
    forecast_valid = model.predict(future_valid)

    valid_forecast = forecast_valid[['ds', 'yhat']].set_index('ds').join(valid.set_index('ds'))
    valid_forecast.reset_index(inplace=True)
    valid_forecast.dropna(inplace=True)

    valid_forecast['month'] = valid_forecast['ds'].dt.month
    monthly_accuracy = (
        valid_forecast.groupby('month')[['yhat', 'y']]
        .apply(lambda x: 1 - (abs(x['yhat'] - x['y']).sum() / x['y'].sum()))
    )

    for month, acc in monthly_accuracy.items():
        accuracy_results.append({'SerialNum': serial, 'Month': month, 'Accuracy': round(acc, 4)})

    future_3mo = model.make_future_dataframe(periods=13, freq='W')
    forecast_3mo = model.predict(future_3mo).tail(13)
    forecast_3mo = forecast_3mo[['ds', 'yhat']].rename(columns={'ds': 'weekend_date', 'yhat': 'forecast_quantity'})
    forecast_3mo['SerialNum'] = serial
    forecast_results.append(forecast_3mo)

    joblib.dump(model, f"saved_models/prophet_serial_{serial}.pkl")



🔧 Processing SerialNum: 1


18:33:42 - cmdstanpy - INFO - Chain [1] start processing
18:33:42 - cmdstanpy - INFO - Chain [1] done processing
18:33:43 - cmdstanpy - INFO - Chain [1] start processing



🔧 Processing SerialNum: 5


18:33:43 - cmdstanpy - INFO - Chain [1] done processing
18:33:43 - cmdstanpy - INFO - Chain [1] start processing



🔧 Processing SerialNum: 2


18:33:43 - cmdstanpy - INFO - Chain [1] done processing
18:33:44 - cmdstanpy - INFO - Chain [1] start processing



🔧 Processing SerialNum: 4


18:33:44 - cmdstanpy - INFO - Chain [1] done processing


In [13]:
forecast_df = pd.concat(forecast_results).reset_index(drop=True)
accuracy_df = pd.DataFrame(accuracy_results)

forecast_df.to_csv("forecast_sept_nov_all_products.csv", index=False)
accuracy_df.to_csv("monthly_accuracy_jun_aug_all_products.csv", index=False)


## 🧠 Modeling Summary
- Prophet model trained per `SerialNum` on historical weekly sales.
- Validation performed on Jun–Aug 2024 using absolute percentage accuracy.
- Forecasts generated for Sept–Nov 2024.
- Models saved for reproducibility (`.pkl` per product).
