# TIME SERIES FORECASTING

In [10]:
%pip install pandas numpy statsmodels pmdarima scikit-learn openpyxl

Note: you may need to restart the kernel to use updated packages.


# Import Libraries

In [11]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, Holt, ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_percentage_error
import warnings
warnings.filterwarnings('ignore')

## Load and Filter Data 

In [None]:
import pandas as pd

df = pd.read_excel('Cleaned_Sales_History.xlsx')
df = df[df['Key Figure'] == 'Sales History']

## Monthly Sales Data

In [13]:
import pandas as pd
import numpy as np

month_cols = [col for col in df.columns if '-' in col and col not in ['2022', '2023', '2024', '2025', '2026', 'Total Qty']]

monthly_data = []
for _, row in df.iterrows():
    for month in month_cols:
        try:
            # Parse date and force to month-end
            date = pd.to_datetime(month, format='%b-%y') + pd.offsets.MonthEnd(0)
            sales = row[month]
            monthly_data.append({
                'Product ID': row['Product ID'],
                'Date': date,
                'Sales': sales
            })
        except ValueError as e:
            print(f"Error parsing date for column {month}: {e}")
            continue

monthly_df = pd.DataFrame(monthly_data)

duplicates = monthly_df[monthly_df.duplicated(subset=['Date', 'Product ID'], keep=False)]
if not duplicates.empty:
    print("Duplicates found:")
    print(duplicates)

monthly_df = monthly_df.groupby(['Date', 'Product ID'])['Sales'].sum().reset_index()

monthly_df = monthly_df.pivot(index='Date', columns='Product ID', values='Sales')

date_range = pd.date_range(start=monthly_df.index.min(), end=monthly_df.index.max(), freq='M')
monthly_df = monthly_df.reindex(date_range, fill_value=0)

monthly_df.index.freq = 'M'
if monthly_df.index.freq is None:
    print("Warning: Could not set monthly frequency. Check index for irregularities.")
    print("Index dates:", monthly_df.index)

monthly_df = monthly_df.fillna(0)

Duplicates found:
                                             Product ID       Date  Sales
0     100000093330 - PC Gourmet West Coast Dark Coff... 2022-09-30  25730
1     100000093330 - PC Gourmet West Coast Dark Coff... 2022-10-31  34344
2     100000093330 - PC Gourmet West Coast Dark Coff... 2022-11-30  16956
3     100000093330 - PC Gourmet West Coast Dark Coff... 2022-12-31  16632
4     100000093330 - PC Gourmet West Coast Dark Coff... 2023-01-31  17766
...                                                 ...        ...    ...
1348           100000093485 - USF 100% Colombian 42X2oz 2025-01-31     40
1349           100000093485 - USF 100% Colombian 42X2oz 2025-02-28     32
1350           100000093485 - USF 100% Colombian 42X2oz 2025-03-31     32
1351           100000093485 - USF 100% Colombian 42X2oz 2025-04-30     32
1352           100000093485 - USF 100% Colombian 42X2oz 2025-05-31     32

[1056 rows x 3 columns]


## Train-Test Split

In [14]:
import pandas as pd

train = monthly_df.loc[:'2025-03-31']
test = monthly_df.loc['2025-04-30':'2025-05-31']
forecast_horizon = len(test)

## Apply Forecasting Models and Compute MAPE

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, Holt, ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_percentage_error

results = {prod: {} for prod in monthly_df.columns}
forecasts = {prod: {} for prod in monthly_df.columns}

# Forecasting
for product in monthly_df.columns:
    train_series = train[product].dropna()
    test_series = test[product].dropna()
    
    if len(train_series) < 12 or train_series.sum() == 0:
        continue
    
    # SES
    try:
        ses_model = SimpleExpSmoothing(train_series).fit()
        ses_forecast = ses_model.forecast(forecast_horizon)
        forecasts[product]['SES'] = ses_forecast
        results[product]['SES'] = mean_absolute_percentage_error(test_series, ses_forecast) * 100
    except:
        results[product]['SES'] = np.nan
    
    # DES
    try:
        des_model = Holt(train_series).fit()
        des_forecast = des_model.forecast(forecast_horizon)
        forecasts[product]['DES'] = des_forecast
        results[product]['DES'] = mean_absolute_percentage_error(test_series, des_forecast) * 100
    except:
        results[product]['DES'] = np.nan
    
    # TES
    try:
        tes_model = ExponentialSmoothing(train_series, trend='add', seasonal='add', seasonal_periods=12).fit()
        tes_forecast = tes_model.forecast(forecast_horizon)
        forecasts[product]['TES'] = tes_forecast
        results[product]['TES'] = mean_absolute_percentage_error(test_series, tes_forecast) * 100
    except:
        results[product]['TES'] = np.nan
    
    # Moving Average
    try:
        ma_forecast = train_series.rolling(window=12).mean().iloc[-1] * np.ones(forecast_horizon)
        forecasts[product]['MA'] = ma_forecast
        results[product]['MA'] = mean_absolute_percentage_error(test_series, ma_forecast) * 100
    except:
        results[product]['MA'] = np.nan
    
    # ARIMA
    try:
        arima_model = ARIMA(train_series, order=(1,1,1)).fit()
        arima_forecast = arima_model.forecast(forecast_horizon)
        forecasts[product]['ARIMA'] = arima_forecast
        results[product]['ARIMA'] = mean_absolute_percentage_error(test_series, arima_forecast) * 100
    except:
        results[product]['ARIMA'] = np.nan
    
    # Auto ARIMA
    try:
        auto_arima_model = auto_arima(train_series, seasonal=True, m=12, suppress_warnings=True)
        auto_arima_forecast = auto_arima_model.predict(n_periods=forecast_horizon)
        forecasts[product]['Auto ARIMA'] = auto_arima_forecast
        results[product]['Auto ARIMA'] = mean_absolute_percentage_error(test_series, auto_arima_forecast) * 100
    except:
        results[product]['Auto ARIMA'] = np.nan

## Generate Future Predictions

In [16]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, Holt, ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima

# Future predictions (Jun 2025 to May 2026)
future_months = pd.date_range(start='2025-06-30', end='2026-05-31', freq='M')
future_forecasts = {prod: {} for prod in monthly_df.columns}

for product in monthly_df.columns:
    series = monthly_df[product].dropna()
    if len(series) < 12 or series.sum() == 0:
        continue
    
    best_model = pd.Series(results[product]).idxmin()
    if pd.isna(best_model):
        continue
    
    if best_model == 'SES':
        model = SimpleExpSmoothing(series).fit()
        future_forecasts[product] = model.forecast(12)
    elif best_model == 'DES':
        model = Holt(series).fit()
        future_forecasts[product] = model.forecast(12)
    elif best_model == 'TES':
        model = ExponentialSmoothing(series, trend='add', seasonal='add', seasonal_periods=12).fit()
        future_forecasts[product] = model.forecast(12)
    elif best_model == 'MA':
        future_forecasts[product] = series.rolling(window=12).mean().iloc[-1] * np.ones(12)
    elif best_model == 'ARIMA':
        model = ARIMA(series, order=(1,1,1)).fit()
        future_forecasts[product] = model.forecast(12)
    elif best_model == 'Auto ARIMA':
        model = auto_arima(series, seasonal=True, m=12, suppress_warnings=True)
        future_forecasts[product] = model.predict(n_periods=12)

# Print Result

In [17]:
import pandas as pd

mape_df = pd.DataFrame(results).T.reset_index().rename(columns={'index': 'Product ID'})
future_df = pd.DataFrame(future_forecasts, index=future_months).T.reset_index().rename(columns={'index': 'Product ID'})
future_df.columns = ['Product ID'] + [date.strftime('%b-%Y') for date in future_df.columns[1:]]

print("MAPE Scores:\n", mape_df)
print("\nFuture Predictions:\n", future_df)

MAPE Scores:
                                            Product ID         SES         DES  \
0   100000093268 - TDL 100% Colombian Dark Medium ...   26.305572   21.105051   
1      100000093270 - TDL French Vanilla Light 6x300g   53.368766   41.570397   
2   100000093271 - TDL Hazelnut Light Medium Roast...   17.279506   15.569626   
3   100000093279 - TDL Original Blend Medium Roast...    2.316286    2.316288   
4   100000093283 - No Name SS OW Medium Roast 100C...    0.005767    2.821440   
5   100000093288 - No Name SS OW Dark Roast 100CT ...    3.947974    4.861865   
6         100000093295 - PC SS West Coast Dark 4x30CT   46.876086   15.199473   
7               100000093300 - PC Great Canadian 72ct   10.733768    9.410525   
8              100000093301 - PC West Coast Dark 72ct   12.370549    2.778045   
9     100000093307 - No Name Dark Roast Coffee 6x925g   18.822847   21.031485   
10         100000093309 - No Name Medium Roast 6x925g    9.888540   12.762807   
11  1000000933

## Save Results

In [18]:
import pandas as pd
import os

output_dir = os.getcwd()
mape_file = os.path.join(output_dir, 'mape_scores_monthly.csv')
future_file = os.path.join(output_dir, 'future_predictions_monthly.csv')

try:
    mape_df = pd.DataFrame(results).T.reset_index().rename(columns={'index': 'Product ID'})

    future_df = pd.DataFrame(future_forecasts, index=future_months).T.reset_index().rename(columns={'index': 'Product ID'})
    future_df.columns = ['Product ID'] + [date.strftime('%b-%Y') for date in future_df.columns[1:]]

    mape_df.to_csv(mape_file, index=False)
    future_df.to_csv(future_file, index=False)

    print(f"MAPE Scores saved to '{mape_file}'")
    print(f"Future Predictions saved to '{future_file}'")

except PermissionError as e:
    print(f"PermissionError: Could not write to files due to: {e}")
    print("Try closing any programs using the output files (e.g., Excel) or check directory permissions.")
    print(f"Attempted to save in: {output_dir}")
except Exception as e:
    print(f"Error saving files: {e}")
    print(f"Attempted to save in: {output_dir}")

MAPE Scores saved to 'd:\Python\time-series-forecasting\time-series-yash\future-predicitons-monthly\Basic\mape_scores_monthly.csv'
Future Predictions saved to 'd:\Python\time-series-forecasting\time-series-yash\future-predicitons-monthly\Basic\future_predictions_monthly.csv'
