In [1]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_log_error
import numpy as np

In [2]:
df = pd.read_csv('../../Data/Pharmacy/D4.csv', parse_dates=['date'], dayfirst=True)

In [3]:
df.rename(columns={'date': 'ds', 'looseqty': 'y'}, inplace=True)

In [4]:
total_sales_by_product = df.groupby('itemname')['y'].sum().reset_index()

In [5]:
top_10_products = total_sales_by_product.nlargest(10, 'y')['itemname'].tolist()

In [6]:
top_10_products

['PANADOL TAB',
 'LOPRIN 75MG TAB',
 "SURBEX Z TAB(30'S)",
 'GLUCOPHAGE 500MG TAB',
 'FACE MASK  3 PLY  GREEN  RS(5)',
 "DISPRIN 300MG TAB (600'S)",
 "CALPOL TAB (200'S)",
 'PANADOL EXTRA TAB',
 'METHYCOBAL TAB',
 'NUBEROL FORTE TAB']

In [7]:
models = {}
predictions = {}
rmsle_scores = {}

In [8]:
for product in top_10_products:
    product_data = df[df['itemname'] == product]

    try:
        model = Prophet()
        model.fit(product_data)
    
        future = model.make_future_dataframe(periods=len(product_data))
    
        forecast = model.predict(future)
    
        models[product] = model
        predictions[product] = forecast
    
        actual_values = product_data['y'].values
        predicted_values = forecast.tail(len(product_data))['yhat'].values
    
        rmsle = np.sqrt(mean_squared_log_error(actual_values, predicted_values))
        rmsle_scores[product] = rmsle
    
        print(f"RMSLE for {product}: {rmsle}")

    except Exception as e:
        print(f"\nError processing {product}: {e}\n")


01:49:47 - cmdstanpy - INFO - Chain [1] start processing
01:49:47 - cmdstanpy - INFO - Chain [1] done processing
01:49:47 - cmdstanpy - INFO - Chain [1] start processing
01:49:47 - cmdstanpy - INFO - Chain [1] done processing
01:49:48 - cmdstanpy - INFO - Chain [1] start processing
01:49:48 - cmdstanpy - INFO - Chain [1] done processing


RMSLE for PANADOL TAB: 1.449161742068069
RMSLE for LOPRIN 75MG TAB: 0.8198845680482802


01:49:48 - cmdstanpy - INFO - Chain [1] start processing
01:49:48 - cmdstanpy - INFO - Chain [1] done processing
01:49:48 - cmdstanpy - INFO - Chain [1] start processing
01:49:48 - cmdstanpy - INFO - Chain [1] done processing


RMSLE for SURBEX Z TAB(30'S): 0.8403581403162298
RMSLE for GLUCOPHAGE 500MG TAB: 0.8840147737925206


01:49:48 - cmdstanpy - INFO - Chain [1] start processing
01:49:48 - cmdstanpy - INFO - Chain [1] done processing
01:49:48 - cmdstanpy - INFO - Chain [1] start processing
01:49:48 - cmdstanpy - INFO - Chain [1] done processing


RMSLE for FACE MASK  3 PLY  GREEN  RS(5): 0.9510361935653933

Error processing DISPRIN 300MG TAB (600'S): Mean Squared Logarithmic Error cannot be used when targets contain negative values.


01:49:48 - cmdstanpy - INFO - Chain [1] start processing
01:49:48 - cmdstanpy - INFO - Chain [1] done processing
01:49:48 - cmdstanpy - INFO - Chain [1] start processing
01:49:48 - cmdstanpy - INFO - Chain [1] done processing



Error processing CALPOL TAB (200'S): Mean Squared Logarithmic Error cannot be used when targets contain negative values.

RMSLE for PANADOL EXTRA TAB: 1.1875023458530114


01:49:48 - cmdstanpy - INFO - Chain [1] start processing
01:49:48 - cmdstanpy - INFO - Chain [1] done processing


RMSLE for METHYCOBAL TAB: 0.8022049117750772
RMSLE for NUBEROL FORTE TAB: 0.5509621497620908


In [9]:
average_rmsle = np.mean(list(rmsle_scores.values()))
print(f"Average RMSLE for the top 10 products: {average_rmsle}")

Average RMSLE for the top 10 products: 0.935640603147584
