# Import Library

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install pmdarima

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB)
Collecting Cython!=0.29.18,!=0.29.31,>=0.29 (from pmdarima)
  Downloading Cython-3.0.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB)
Collecting statsmodels>=0.13.2 (from pmdarima)
  Downloading statsmodels-0.14.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.2 kB)
Collecting patsy>=0.5.6 (from statsmodels>=0.13.2->pmdarima)
  Downloading patsy-1.0.1-py2.py3-none-any.whl.metadata (3.3 kB)
Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Cython-3.0.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 

In [None]:
import pandas as pd
import matplotlib.pyplot as mlt
import datetime
import itertools
import warnings
import statsmodels.api as sm
from google.colab import files
import numpy as np

# Import Data

In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

worksheet = gc.open('Revenue_2024').worksheet("Revenue_2024")
dataset = worksheet.get_all_values()
# dataset # all dataset
# dataset = dataset[1:145]
len(dataset)

12

In [None]:
# dataset

In [None]:
data = pd.DataFrame(dataset, columns=['Date', 'Price'] )
data['Date'] = pd.to_datetime(data['Date'].astype('datetime64[ns]'))
data['Price'] = pd.to_numeric(data['Price'].fillna(0).astype('int64'))
# data = data.loc[(data['Date'] >= '2010-1-1') &
#                 (data['Date'] < '2021-12-1')]
# data = data.reset_index()
# data = data.drop(columns=['index'])
data = data.set_index('Date')
# data


#converting to image
# dataframe_to_image.convert(data.head(),visualisation_library='matplotlib')

In [None]:
data.describe()

Unnamed: 0,Date,Price
count,11,11
unique,11,11
top,Jan-24,35461043644
freq,1,1


In [None]:
dataSeries = pd.Series(data['Price'])
dataSeries

# New section

In [None]:
# mlt.figure(figsize=(25, 5))
# mlt.plot(data)
# mlt.grid()

# # axis labels and title
# mlt.xlabel("Month", fontsize=10)
# mlt.ylabel("Price", fontsize=10)
# mlt.title("Price of Rice", fontsize=18)

# #add legend
# mlt.legend(["Price"], loc=2, fontsize=8)

dataSeries.plot(y='Price', subplots=True, grid=False, figsize=(15, 8), fontsize=12)
mlt.xlabel('timestamp', fontsize=12)
mlt.ylabel('Price', fontsize=12)

mlt.savefig('data.png')
# files.download("data.png")

mlt.show()

#save image

In [None]:
from statsmodels.tsa.stattools import adfuller, kpss

# ASF
result = adfuller(data['Price'], autolag='AIC')
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

# KPSS
stats, p, lags, critical_values = kpss(data['Price'], 'ct')
print(f'KPSS Test Statictic: {stats}')
print(f'p-value: {p}')

print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

In [None]:
train = data.copy()[(data.index >= '01-01-2010') & (data.index < '01-12-2020')]
test = data.copy()[data.index >= '01-01-2021']

print('Training data shape: ', train.shape)
print('Test data shape: ', test.shape)

In [None]:
# Define the p, d and q parameters to take any value between 0 and 2
p = d = q = range(0, 2)

# Generate all different combinations of p, q and q triplets
pdq = list(itertools.product(p, d, q))

# Generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

In [None]:
warnings.filterwarnings("ignore") # specify to ignore warning messages

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(data['Price'],
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)

            results = mod.fit()

            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue

In [None]:
mod = sm.tsa.statespace.SARIMAX(data['Price'],
                                order=(1, 1, 1),
                                seasonal_order=(1, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results = mod.fit()

print(results.summary().tables[1])

In [None]:
results.plot_diagnostics(figsize=(15, 12))

mlt.savefig('plot diagnostic.png')
# files.download("foo.png")

mlt.show()


In [None]:
pred = results.get_prediction(start=pd.to_datetime('2021-01-01'), dynamic=False)
pred_ci = pred.conf_int()
# print(pred)
# print(pred_ci)

In [None]:
ax = data['2010':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7)

ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)

ax.set_xlabel('Date')
ax.set_ylabel('Price')
mlt.legend()

mlt.savefig('validate process.png')
# files.download("foo.png")

mlt.show()

# Measurement Evaluation

In [None]:
import random
import math as m

def mape(a, f):

  if len(a) < 0 and len(f) < 0:
    print("is not array")

  result = []
  for i in range(len(a)):
    est = abs(a[i] - f[i]/a[i])
    result.append(est)

  return sum(result)/len(a)

def wape(a, f):

  if len(a) < 0 and len(f) < 0:
    print("is not array")

  forecastWeight = []

  result = []
  for i in range(len(a)):
    forecastWeight.append(abs(a[i] - f[i]))

  return sum(forecastWeight)/abs(sum(a))

def wmape(a, f, d):

  if len(a) < 0 and len(f) < 0:
    print("is not array")

  w = []
  if d == 1: w = [random.uniform(1, 3) for _ in range(len(a))]
  elif d == 2: w = [random.gauss(1, 3) for _ in range(len(a))]
  else: w = [random.randint(1, 3) for _ in range(len(a))]

  forecastWeight = []
  actualWeight = []

  result = []
  for i in range(len(a)):
    forecastWeight.append(w[i]*abs(a[i] - f[i]))
    actualWeight.append(w[i]*a[i])

  return sum(forecastWeight)/sum(actualWeight)

def rmse(a,f):

  if len(a) < 0 and len(f) < 0:
    print("is not array")

  gap = []
  for i in range(len(a)):
    gap.append(abs(a[i] - f[i])**2)

  return m.sqrt(sum(gap)/len(a))

def wrmse(a, f, d):

  if len(a) < 0 and len(f) < 0:
    print("is not array")

  w = []
  if d == 1: w = [random.uniform(1, 3) for _ in range(len(a))]
  elif d == 2: w = [random.gauss(1, 3) for _ in range(len(a))]
  else: w = [m.log(a[i]+1) + 1 for i in range(len(a))]

  gap = []
  for i in range(len(a)):
    gap.append(w[i]*(abs(a[i] - f[i])**2))

  return m.sqrt(sum(gap)/len(a))

In [None]:
y_forecasted = pred.predicted_mean
y_truth = data['2010-01-01':]
y_pred = y_forecasted[1:].values
y_truth2 = y_truth[1:].values

In [None]:
forecast = y_pred
actual = y_truth2

rMape = mape(actual, forecast)
rWape = wape(actual, forecast)
rWmape = wmape(actual, forecast, 2)
rRmse = rmse(actual, forecast)
rWrmse = wrmse(actual, forecast, 0)

evalTable = pd.DataFrame({"mape":rMape,
                          "wape":rWape,
                          "wmape":rWmape,
                          "rmse":rRmse,
                          "wrmse":rWrmse})

display(evalTable)

In [None]:
y_forecasted = pred.predicted_mean
y_truth = data[: '2016-1-1']
y_pred = y_forecasted[1:].values
y_truth2 = y_truth[1:].values

In [None]:
forecast = y_pred
actual = y_truth2

rMape = mape(actual, forecast)
rWape = wape(actual, forecast)
rWmape = wmape(actual, forecast, 2)
rRmse = rmse(actual, forecast)
rWrmse = wrmse(actual, forecast, 0)

evalTable = pd.DataFrame({"mape":rMape,
                          "wape":rWape,
                          "wmape":rWmape,
                          "rmse":rRmse,
                          "wrmse":rWrmse})

display(evalTable)

# Predict

In [None]:
pred_dynamic = results.get_prediction(start=pd.to_datetime('2021-01-01'), dynamic=True, full_results=True)
pred_dynamic_ci = pred_dynamic.conf_int()
# print(pred_dynamic_ci)

In [None]:
ax = data['2010':].plot(label='observed', figsize=(20, 15))
pred_dynamic.predicted_mean.plot(label='Dynamic Forecast', ax=ax)

ax.fill_between(pred_dynamic_ci.index,
                pred_dynamic_ci.iloc[:, 0],
                pred_dynamic_ci.iloc[:, 1], color='k', alpha=.25)

ax.fill_betweenx(ax.get_ylim(), pd.to_datetime('2021-01-01'), data.index[-1],
                 alpha=.1, zorder=-1)

ax.set_xlabel('Date')
ax.set_ylabel('Price')

mlt.legend()
mlt.savefig('range price.png')
# files.download("foo.png")
mlt.show()

In [None]:
ax = data['2016':].plot(label='observed', figsize=(20, 15))
pred_dynamic.predicted_mean.plot(label='Dynamic Forecast', ax=ax)

ax.fill_between(pred_dynamic_ci.index,
                pred_dynamic_ci.iloc[:, 0],
                pred_dynamic_ci.iloc[:, 1], color='k', alpha=.25)

ax.fill_betweenx(ax.get_ylim(), pd.to_datetime('2020-01-01'), data.index[-1],
                 alpha=.1, zorder=-1)

ax.set_xlabel('Date')
ax.set_ylabel('Price')

mlt.legend()
# mlt.savefig('foo.png')
# files.download("foo.png")
mlt.show()

In [None]:
# Extract the predicted and true values of our time series
y_forecasted = pred_dynamic.predicted_mean
y_truth = data['2010-01-01':]
y_pred = y_forecasted[1:10].values
y_truth2 = y_truth[1:10].values

mse = 0
for j in range(len(y_truth2)):
  mse = ((y_pred[j] - y_truth2[j]) ** 2).mean()

print('The Mean Squared Error of our forecasts is {}'.format(mse))

In [None]:
# Get forecast 500 steps ahead in future
pred_uc = results.get_forecast(steps = 50)

# Get confidence intervals of forecasts
pred_ci = pred_uc.conf_int()

In [None]:
ax = data['2010-01-01':].plot(label='observed', figsize=(20, 15))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('Price')

mlt.legend()
mlt.savefig('predict.png')

mlt.show()

In [None]:
start = len(train)
end = len(train) + len(test) - 1

# Predictions for one-year against the test set
predictions = results.predict(start, end,
                             typ = 'levels').rename("Predictions")

# plot predictions and actual values
predictions.plot(legend = True)
test.plot(legend = True)

In [None]:
model = model = sm.tsa.statespace.SARIMAX(data['Price'],
                        order = (1, 1, 1),
                        seasonal_order =(1, 1, 1, 12))
result = model.fit()

# Forecast for the next 3 years
forecast = result.predict(start = len(data),
                          end = (len(data)-1) + 3 * 12,
                          typ = 'levels').rename('Forecast')

# Plot the forecast values
data['Price'].plot(figsize = (12, 5), legend = True)
forecast.plot(legend = True)

In [None]:
# forecast = [train, test]
# result = pd.concat(forecast)
# print(result)


# ax = result.plot(label='observed', figsize=(25, 10))
# pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
# ax.fill_between(pred_ci.index,
#                 pred_ci.iloc[:, 0],
#                 pred_ci.iloc[:, 1], color='k', alpha=.25)
# ax.set_xlabel('Date')
# ax.set_ylabel('Price')

# mlt.legend()
# print(pred_uc.predicted_mean)

# mlt.show()

In [None]:
import math as m

y_forecasted = pred.predicted_mean
y_truth = data['2010-01-01':]
  # y_mse = data['Price']
  # print(y_forecasted)
y_pred = y_forecasted[1:].values
y_truth2 = y_truth[1:].values
  # print(y_pred[1], y_truth2[1])
  # result = []

mse = 0
for j in range(len(y_truth2)):
  mse = ((y_pred[j] - y_truth2[j]) ** 2).mean()

print('The Mean Squared Error of our forecasts is {}'.format(mse))

In [None]:
import random as rd
import pandas as pd

f = [round(rd.random() * 100,3) for _ in range(1,10)]
a = [round(rd.random() * 300,3) for _ in range(1,10)]
w = [round(rd.random() * 10, 2) for _ in range(1,10)]

table = pd.DataFrame({"Actual":a, "Forecast":f, "Weight":w})

display(table)

In [None]:
def wape(f, a):
  return abs(a-f)/a

In [None]:
at = sum(a)/len(a)
ft = sum(f)
wt = sum(w)

result = wape(ft, at)
print(result)

# print(result)

In [None]:
result = (10*(492-451)/10*492)
print(result)

In [None]:
import pandas as pd

df = pd.DataFrame({'Actual': a,
                   'Forecasted': f})

wape = (df.Actual - df.Forecasted).abs().sum() / df.Actual.sum()
print(wape)

In [None]:
import random

def wmape(a, f, d):

  if len(a) < 0 and len(f) < 0:
    print("is not array")

  w = []
  if d == 1: w = [random.uniform(1, 3) for _ in range(len(a))]
  elif d == 2: w = [random.gauss(1, 3) for _ in range(len(a))]
  else: w = [random.randint(1, 3) for _ in range(len(a))]

  forecastWeight = []
  actualWeight = []

  result = []
  for i in range(len(a)):
    forecastWeight.append(w[i]*abs(a[i] - f[i]))
    actualWeight.append(w[i]*a[i])

  return sum(forecastWeight)/sum(actualWeight)