### Importing the basic libraries that are used to perform analysis on python

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

### Reading the data

In [None]:
gold_price_full = pd.read_csv('gold_price_data.csv')
gold_price_full['Date'] = pd.to_datetime(gold_price_full['Date'])

### Lets look at the first few rows of the data

In [None]:
gold_price_full.head()

## `Set the DATE column as the index.`


In [None]:
gold_price_full= gold_price_full.set_index('Date')

In [None]:
gold_price_full.head()

### We will be using data till 2017 to train and forecasting model and will be using the model to forecast the sales for 2018 onwards.

In [None]:
train = gold_price_full.loc[:'2018-01-01',:]
test = gold_price_full.loc['2017-12-01':,:]


In [None]:
train.head()

## Making the time plot of the train dataset to visualise it


In [None]:
plt.figure(figsize=[15,10])
plt.xlabel('Year')
plt.ylabel('Sales')
plt.plot(train)

## Question - 1: Write your observations about cold drink sales in detail.

In [None]:
# Run this cell and don't make any changes
for_sma = train.copy()

First, Let's use a simple forecasting technique. Implementing the simple moving average forecasting model with a rolling window of 12 months

In [None]:
for_sma['SMA_12'] = for_sma['Value'].rolling(window = 12).mean()
for_sma.tail(1)

## Question - 2: According to the results obtained in the previous question, what can you infer about the results obtained? Is the moving average model working fine? 

Now lets build an Auto-Regressive models

## Question-3: Before we moving ahead building an Auto Regressive model, we would be required to remove trend and seasonality from the give time series. Answer, why does Trend and seasonality affect Auto regressive models adversely and why is it required to be removed before building a ARIMA model?

## Lets check the rolling statistics 

In [None]:
train = train.iloc[:-1,:]
from matplotlib.pylab import rcParams
def rolling_means(time_series):
    rolmean  =  time_series.rolling(window = 12).mean()   
    rolstd  =  time_series.rolling(window = 12).std()

    #Plot rolling statistics

    rcParams['figure.figsize']  =  10, 5 

    orig  =  plt.plot(time_series, color = 'blue', label = 'Original')
    mean  =  plt.plot(rolmean, color = 'red', label = 'Rolling Mean')
    std  =  plt.plot(rolstd, color = 'black', label = 'Rolling Std')
    plt.legend(loc = 'best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block = False)

In [None]:
rolling_means(train)

## Question-4: Explain the significance of above step. What can we infre from the above results?

## Perform ADF test to check the stationarity of the data

In [None]:

def adf_test(time_series):
    from statsmodels.tsa.stattools import adfuller
    dftest  =  adfuller(time_series['Value'], autolag = 'AIC')

    dfoutput  =  round(pd.Series(dftest[0:4], index = ['Test Statistic','p-value','#Lags Used','Number of Observations Used']),4)
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key]  =  value
    
    print('Results of Dickey Fuller Test:')    
    print(dfoutput)

In [None]:
adf_test(train)

## Question-5: What is the null hypothesis of the ADF test? What can you infer from the results of this test?

# Log Transformation and Differencing

Here we will remove the trend and seasonality from our data.

In [None]:
log_trans = np.log(train['Value'])

In [None]:
diff_data = pd.DataFrame(log_trans - log_trans.shift()).dropna()

## Question-6: Previously we have created a functions for plotting rolling mean and standard deviation. Use the same function and check the plot of `diff_data`

In [None]:
# Write your code here.

## Question-7: Previously we have created a functions for perfroming adf_test. Use the same function and check the adf_test results for `diff_data`

In [None]:
# Write your code here.

## Question-8: Is the data stationary now? Explain the results here

## Now, before performing time series forecasting lets plot the acf and pacf

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
plt.figure(figsize=(12,4))
plot_acf(diff_data, ax=plt.gca(), lags = 15)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf
plt.figure(figsize=(12,4))
plot_pacf(diff_data, ax=plt.gca(), lags = 15)
plt.show()

## Question-9: What can you infer from the above plots?

### Performing log transformation and differencing on the test dataset

In [None]:
log_test = np.log(test)
diff_test = pd.DataFrame(log_test - log_test.shift()).dropna()

Now let's build an AR Model using the differenced data

In [None]:
from statsmodels.tsa.ar_model import AR
model = AR(diff_data)
model_fit = model.fit()
print('Lag: %s' % model_fit.k_ar)
print('Coefficients: %s' % model_fit.params)

## Make predictions (ONLY FOR THE NEXT 12 MONTHS )

## Using the model(trained in the previous step) to predict for next 12 months.

In [None]:
predictions = model_fit.predict(start=len(diff_data), end=len(diff_data)+len(diff_test)-1, dynamic=False)
predictions.head()

for i in range(len(predictions)):
    print('predicted=%f, expected=%f' % (predictions[i], diff_test.iloc[i]))
    

## Plotting the AR MODEL 

In [None]:
plt.figure(figsize = (15,10))
plt.plot(diff_data[-24:])
plt.plot(predictions)
plt.plot(diff_test)
print('Plotting AR model')

In [None]:
diff_test = diff_test['Value']

## Evaluate the accuracy of the model using metrics such as MSE and MAPE

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mean_absolute_percentage_error(predictions, diff_test)

In [None]:
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
error = mean_squared_error(predictions, diff_test)
print('Test MSE: %.3f' % error)

## Now let's build a MA Mode using the differenced data: Use order = (0,0,1)

In [None]:
from statsmodels.tsa.arima_model import ARIMA
model = ARIMA(diff_data,order=(0,0,1))
results_MA = model.fit()
print('Lag: %s' % results_MA.k_ar)
print('Coefficients: %s' % results_MA.params)

## Make predictions (ONLY FOR THE NEXT 12 MONTHS )

## Using the model(trained in the previous step) to predict for next 12 months.

In [None]:
predictions_ma = results_MA.predict(start=len(diff_data), end=len(diff_data)+len(diff_test)-1, dynamic=False)
predictions_ma.head()

for i in range(len(predictions_ma)):
    print('predicted=%f, expected=%f' % (predictions_ma[i], diff_test.iloc[i]))

## Plotting the MA Model

In [None]:
plt.figure(figsize = (15,10))
plt.plot(diff_data[-24:])
plt.plot(predictions_ma)
plt.plot(diff_test)
print('Plotting MA model')

## Evaluate the accuracy of the MA model using metrics such as MSE and MAPE

In [None]:
mean_absolute_percentage_error(predictions_ma, diff_test)

In [None]:
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
error = mean_squared_error(predictions_ma, diff_test)
print('Test MSE: %.3f' % error)

## Using the value p as 11 and q as 2, building a ARMA model.

In [None]:
from statsmodels.tsa.arima_model import ARMA
model = ARMA(diff_data,order=(11,2))
model_arma = model.fit(transparams=False)
print('Lag: %s' % model_arma.k_ar)
print('Coefficients: %s' % model_arma.params)

## Making predictions using ARMA model

In [None]:
predictions_arma = model_arma.predict(start=len(diff_data), end=len(diff_data)+len(diff_test)-1, dynamic=False)
predictions_arma.head()

for i in range(len(predictions_arma)):
    print('predicted=%f, expected=%f' % (predictions_arma[i], diff_test.iloc[i]))

In [None]:
plt.figure(figsize = (15,10))
plt.plot(diff_data[-24:])
plt.plot(predictions_arma)
plt.plot(diff_test)
print('Plotting ARMA model')

## Evaluate the accuracy of the ARMA model using metrics such as MSE and MAPE`

In [None]:
mean_absolute_percentage_error(predictions_arma, diff_test)

In [None]:
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
error = mean_squared_error(predictions_arma, diff_test)
print('Test MSE: %.3f' % error)

## Question-10: Compare AR,MA and ARMA models using the evaluation metrics and decide the best performing model.