# Implement basic smoothening 

Implement moving average smoothing, exponential moving average 

In [None]:
# interactive figures
%matplotlib widget 

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# ml training code and other analysis
from one_dimensional_time_series_forecasting import time_series_prediction
from one_dimensional_time_series_forecasting import hit_rate

# model evalution metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

# data preprocessing
from sklearn.preprocessing import normalize
from sklearn.preprocessing import MinMaxScaler

# Looking at Airplane dataset

In [None]:
# import some data
df = pd.read_csv('./test_data/AirPassengers.csv') 
df

In [None]:
# implement simple moving average
df['MA_5'] = df['#Passengers'].rolling(window=5).mean()
df['MA_10'] = df['#Passengers'].rolling(window=10).mean()
df['MA_15'] = df['#Passengers'].rolling(window=15).mean()

df

In [None]:
# visualize differences
df.plot(subplots=False)
plt.tight_layout()

The lag created by a moving average is easily visible above.

In [None]:
# set global forecasting variables:
ma_window = 5
window_length = 15
split = 44

# Forecast normal model ie no feature engineering

In [None]:

# initialize class object
normal = time_series_prediction(df['Month'],df['#Passengers'],window_length,1)
normal.sliding_window_1(verbose=0) # time series to supervised ML problem
normal.train_test_split(split=split) # testing and training dataset split
normal.test_train_plot(ylabel='#Passengers')    # visualize training split

# perform some prediction tasks
normal.linear_regression()
normal.support_vector_machine(model_tunning=True)
normal.neural_net_mlp(model_tunning=True)
normal.naive_model()

# visualize results
normal.vis_results_time_series(second_plot='error',ylabel='#Passengers')

tabulated_results_0 = normal.results()
tabulated_results_0.plot()
display(tabulated_results_0)

In [None]:
# what is the accuracy of price movements for these predictions

# data to feed to hit_rate function:
dates = tabulated_results_0['date'].iloc[split+window_length:]
original_values = tabulated_results_0['Value'].iloc[split+window_length:]
lin_predictions = tabulated_results_0['Linear'].iloc[split+window_length:]
svm_predictions = tabulated_results_0['SVM'].iloc[split+window_length:]
nn_predictions =  tabulated_results_0['NN'].iloc[split+window_length:]
naive_predictions =  tabulated_results_0['Naive'].iloc[split+window_length:]

# hit rate calculations
print('Linear Regression:')
df_lin = hit_rate(dates,original_values,lin_predictions)

print('SVM:')
df_svm = hit_rate(dates,original_values,svm_predictions)

print('NN:')
df_nn = hit_rate(dates,original_values,nn_predictions)

print('Naive:')
df_naive = hit_rate(dates,original_values,naive_predictions)

# forecast model with smoothed inputs

In [None]:
# data preprocessing
df = pd.read_csv('./test_data/AirPassengers.csv') 
display(df)
df['MA_5'] = df['#Passengers'].rolling(window=ma_window).mean()
df.dropna(inplace=True)
df


In [None]:
# initialize class object
smoothed = time_series_prediction(df['Month'],df['MA_5'],window_length,1)
smoothed.sliding_window_1(verbose=0) # time series to supervised ML problem
smoothed.train_test_split(split=split-ma_window) # want to test on the same set at the normal forecasting problem above
smoothed.test_train_plot()    # visualize training split

# perform some prediction tasks
smoothed.linear_regression()
smoothed.support_vector_machine(model_tunning=True)
smoothed.neural_net_mlp(model_tunning=True)
smoothed.naive_model()

# visualize results
smoothed.vis_results_time_series(second_plot='error')

# evaluation metric against original data not smoothed data

# add all these results for forecast window period together
df_results = pd.DataFrame(columns=['date','Original Values','Inverted linear','Inverted svm','Inverted NN'])
df_results['date'] = df['Month'].iloc[split-ma_window+window_length:]
df_results['Original Values'] = df['#Passengers'].iloc[split-ma_window+window_length:]
df_results['Inverted linear'] = smoothed.linear_reg_predictions
df_results['Inverted svm'] = smoothed.svm_predictions
df_results['Inverted NN'] = smoothed.neural_net_predictions

# RMSE of this method
mse_lin = mean_squared_error(df_results['Original Values'],df_results['Inverted linear'])
mse_svm = mean_squared_error(df_results['Original Values'],df_results['Inverted svm'])
mse_nn = mean_squared_error(df_results['Original Values'],df_results['Inverted NN'])

print(f'RMSE linear: {mse_lin**0.5}')
print(f'RMSE svm: {mse_svm**0.5}')
print(f'RMSE nn: {mse_nn**0.5}')

# df_results.plot()
tabulated_results_1 = smoothed.results()
tabulated_results_1.plot()
display(tabulated_results_1)


In [None]:
# what is the accuracy of price movements for these predictions

# data to feed to hit_rate function:
dates = df_results['date']
original_values = df_results['Original Values']
lin_predictions = df_results['Inverted linear']
svm_predictions = df_results['Inverted svm']
nn_predictions =  df_results['Inverted NN']

# hit rate calculations
print('Linear Regression:')
df_lin = hit_rate(dates,original_values,lin_predictions)

print('SVM:')
df_svm = hit_rate(dates,original_values,svm_predictions)

print('NN:')
df_nn = hit_rate(dates,original_values,nn_predictions)
