In [1]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_squared_log_error
import numpy as np

In [2]:
df = pd.read_csv('../../Data/Pharmacy/D4.csv', parse_dates=['date'], dayfirst=True)

In [3]:
df.rename(columns={'date': 'ds', 'looseqty': 'y'}, inplace=True)

In [4]:
unique_products = df['itemname'].unique()
unique_products.size

6053

In [5]:
total_sales_by_product = df.groupby('itemname')['y'].sum().reset_index()

In [6]:
top_10_products = total_sales_by_product.nlargest(10, 'y')['itemname'].tolist()

In [7]:
top_10_products

['PANADOL TAB',
 'LOPRIN 75MG TAB',
 "SURBEX Z TAB(30'S)",
 'GLUCOPHAGE 500MG TAB',
 'FACE MASK  3 PLY  GREEN  RS(5)',
 "DISPRIN 300MG TAB (600'S)",
 "CALPOL TAB (200'S)",
 'PANADOL EXTRA TAB',
 'METHYCOBAL TAB',
 'NUBEROL FORTE TAB']

In [8]:
models = {}
predictions = {}
rmse_scores = {}
rmsle_scores = {}

In [9]:
for product in top_10_products:
    product_data = df[df['itemname'] == product]

    try:
        model = Prophet()
        model.fit(product_data)
    
        future = model.make_future_dataframe(periods=len(product_data))
    
        forecast = model.predict(future)
    
        models[product] = model
        predictions[product] = forecast
    
        actual_values = product_data['y'].values
        predicted_values = forecast.tail(len(product_data))['yhat'].values
        
        predicted_values = np.maximum(predicted_values, 0)
    
        rmse = np.sqrt(mean_squared_error(actual_values, predicted_values))
        rmsle = np.sqrt(mean_squared_log_error(actual_values, predicted_values))
        
        rmse_scores[product] = rmse
        rmsle_scores[product] = rmsle
    
        print(f"\nRMSE for {product}: {rmse}")
        print(f"RMSLE for {product}: {rmsle}")

    except Exception as e:
        print(f"\nError processing {product}: {e}\n")


02:16:09 - cmdstanpy - INFO - Chain [1] start processing
02:16:09 - cmdstanpy - INFO - Chain [1] done processing
02:16:09 - cmdstanpy - INFO - Chain [1] start processing
02:16:09 - cmdstanpy - INFO - Chain [1] done processing
02:16:10 - cmdstanpy - INFO - Chain [1] start processing
02:16:10 - cmdstanpy - INFO - Chain [1] done processing



RMSE for PANADOL TAB: 451.3002894214666
RMSLE for PANADOL TAB: 1.449161742068069

RMSE for LOPRIN 75MG TAB: 52.66517706332909
RMSLE for LOPRIN 75MG TAB: 0.8198845680482802


02:16:10 - cmdstanpy - INFO - Chain [1] start processing
02:16:10 - cmdstanpy - INFO - Chain [1] done processing
02:16:10 - cmdstanpy - INFO - Chain [1] start processing
02:16:10 - cmdstanpy - INFO - Chain [1] done processing



RMSE for SURBEX Z TAB(30'S): 51.675049026206686
RMSLE for SURBEX Z TAB(30'S): 0.8403581403162298

RMSE for GLUCOPHAGE 500MG TAB: 51.15314793486179
RMSLE for GLUCOPHAGE 500MG TAB: 0.8840147737925206


02:16:10 - cmdstanpy - INFO - Chain [1] start processing
02:16:10 - cmdstanpy - INFO - Chain [1] done processing
02:16:10 - cmdstanpy - INFO - Chain [1] start processing
02:16:10 - cmdstanpy - INFO - Chain [1] done processing



RMSE for FACE MASK  3 PLY  GREEN  RS(5): 93.51550739858368
RMSLE for FACE MASK  3 PLY  GREEN  RS(5): 0.9510361935653933

RMSE for DISPRIN 300MG TAB (600'S): 69.37346495085586
RMSLE for DISPRIN 300MG TAB (600'S): 2.656094501643137


02:16:10 - cmdstanpy - INFO - Chain [1] start processing
02:16:10 - cmdstanpy - INFO - Chain [1] done processing
02:16:10 - cmdstanpy - INFO - Chain [1] start processing
02:16:10 - cmdstanpy - INFO - Chain [1] done processing



RMSE for CALPOL TAB (200'S): 83.17048367876455
RMSLE for CALPOL TAB (200'S): 3.7088403000747254

RMSE for PANADOL EXTRA TAB: 57.13100694441406
RMSLE for PANADOL EXTRA TAB: 1.1875023458530114


02:16:10 - cmdstanpy - INFO - Chain [1] start processing
02:16:10 - cmdstanpy - INFO - Chain [1] done processing



RMSE for METHYCOBAL TAB: 34.53889343574355
RMSLE for METHYCOBAL TAB: 0.8022049117750772

RMSE for NUBEROL FORTE TAB: 25.039545651691686
RMSLE for NUBEROL FORTE TAB: 0.5509621497620908


In [10]:
average_rmse = np.mean(list(rmse_scores.values()))
average_rmsle = np.mean(list(rmsle_scores.values()))

In [11]:
print(f"Average RMSE for the top 10 products: {average_rmse}")
print(f"Average RMSLE for the top 10 products: {average_rmsle}")

Average RMSE for the top 10 products: 96.95625655059175
Average RMSLE for the top 10 products: 1.3850059626898534
