# SVR method

## Import data

In [1]:
#import data
import pandas as pd
JPY=pd.read_csv('JPY.csv',sep=',',index_col=0)
JPY.head(5)

Unnamed: 0_level_0,JPY/USD,diff_1,diff_2
daily,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-01-04,119.45,,
2016-01-05,119.06,-0.39,
2016-01-06,118.47,-0.59,-0.2
2016-01-07,117.64,-0.83,-0.24
2016-01-08,117.25,-0.39,0.44


In [2]:
JPY.shape

(592, 3)

## Data prepocessing

In [3]:
#Define input & output. Notice that input is lagged up to 5 period.
# X=(et-1,et-2,et-3,et-4,et-5), Y =et
import numpy as np
#Largest Forecasting period belongs to 7~ 16, choose 7 as forecasting period
t=7
X5=JPY.iloc[1:-5,1].values.reshape(-1,1)
X4=JPY.iloc[2:-4,1].values.reshape(-1,1)
X3=JPY.iloc[3:-3,1].values.reshape(-1,1)
X2=JPY.iloc[4:-2,1].values.reshape(-1,1)
X1=JPY.iloc[5:-1,1].values.reshape(-1,1)
X=np.hstack((X1,X2,X3,X4,X5))
y=JPY.iloc[6:,1].values
X=np.array(X)
y=np.array(y).reshape(-1,1)
X.shape

(586, 5)

In [4]:
#Spilt data into training set and test set. 
#Because these is time series data, we canot randomly spilt them. 
X_train=X[0:-t]
y_train=y[0:-t]
X_test=X[-t:]
y_test=y[-t:]


## SVR

In [5]:
from sklearn.svm import SVR
from sklearn import preprocessing
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [6]:
# Preprocessing data between 0 and 1.
min_max_scaler=preprocessing.MinMaxScaler()
X_train_minmax=min_max_scaler.fit_transform(X_train)
X_test=min_max_scaler.transform(X_test)
y_train=min_max_scaler.fit_transform(y_train)
y_test=min_max_scaler.transform(y_test)
   

In [7]:
#Define SVR function, using different kernel function
#The parameters are found in grid search.See code at the end of the notebook.
#Parameters are find if they perform best in validation set.Each validation set is 7 days which is the same length as test set. 
#We move the windows 10 times to get 10 validation set. 
svr_rbf=SVR(kernel='rbf',C=0.1,gamma=100)
svr_linear=SVR(kernel='linear',C=0.001)
svr_poly=SVR(kernel='poly',C=0.001,degree=7)

In [8]:
# Using SVR getting the forecast value
y_rbf=svr_rbf.fit(X_train,y_train).predict(X_test)
y_linear=svr_linear.fit(X_train,y_train).predict(X_test)
y_poly=svr_poly.fit(X_train,y_train).predict(X_test)

  y = column_or_1d(y, warn=True)


## Prediction

In [9]:
# Define the output prediction metrics
prediction=np.zeros(shape=(t+1,8))
linear=min_max_scaler.inverse_transform(y_linear.reshape(-1,1))
rbf=min_max_scaler.inverse_transform(y_rbf.reshape(-1,1))
poly=min_max_scaler.inverse_transform(y_poly.reshape(-1,1))
prediction[1:,2:5]=np.hstack((linear,rbf,poly))
prediction[:,0]=JPY.iloc[-(t+1):,0].values
prediction[:,1]=np.log(JPY.iloc[-(t+1):,0].values)
prediction[0,1]
for j in range (5,8):
    prediction[1,j]=prediction[0,1]+prediction[1,j-3]
for j in range (5,8):
    for i in range (2,t+1):
        prediction[i,j]=prediction[i,j-3]+ prediction[i-1,j]
prediction[1:,5:8]=np.e**(prediction[1:,5:8])

In [10]:
#output the prediction 
np.savetxt('prediction_SVR.csv',prediction,delimiter=',',
           header='et,lnet,linear,rbf,poly,et-linear,et-rbf,et-poly',
          fmt='%6f')

## The accuracy index

In [11]:
# Define accuracy index to test the result  
prediction=pd.read_csv('prediction_SVR.csv',sep=',')

In [12]:
y_real=prediction.iloc[1:,0].values
y_pre_linear=prediction.iloc[1:,5].values
y_pre_poly=prediction.iloc[1:,7].values
y_pre_rbf=prediction.iloc[1:,6].values

In [13]:
#1.Mean squrad error
MSE_linear=np.sum(np.abs(y_real-y_pre_linear))/t
MSE_poly=np.sum(np.abs(y_real-y_pre_poly))/t
MSE_rbf=np.sum(np.abs(y_real-y_pre_rbf))/t

In [14]:
#2.Mean abosolute error
MAE_linear=np.sum((y_real-y_pre_linear)*(y_real-y_pre_linear))/t
MAE_poly=np.sum((y_real-y_pre_poly)*(y_real-y_pre_poly))/t
MAE_rbf=np.sum((y_real-y_pre_rbf)*(y_real-y_pre_rbf))/t

In [15]:
#3.Root mean abosolute error
RMSE_linear=np.sqrt(MSE_linear)
RMSE_poly=np.sqrt(MSE_poly)
RMSE_rbf=np.sqrt(MSE_rbf)

In [16]:
#4.Mean abosulte percent error
MAPE_linear=1/t*np.sum(np.abs((y_real-y_pre_linear)/y_real))
MAPE_poly=1/t*np.sum(np.abs((y_real-y_pre_poly)/y_real))
MAPE_rbf=1/t*np.sum(np.abs((y_real-y_pre_rbf)/y_real))


In [18]:
#5.  1-MAPE goodness of fit
U_linear=1-MAPE_linear
U_poly=1-MAPE_poly
U_rbf=1-MAPE_rbf

In [19]:
index=np.zeros(shape=(3,5))
index[[0]]=[MSE_linear,MAE_linear,RMSE_linear,MAPE_linear,U_linear]
index[[1]]=[MSE_poly,MAE_poly,RMSE_poly,MAPE_poly,U_poly]
index[[2]]=[MSE_rbf,MAE_rbf,RMSE_rbf,MAPE_rbf,U_rbf]

In [20]:
np.savetxt('index_SVR.csv',index,delimiter=',',
           header='MSE,MAE,RMSE,MAPE,U',fmt='%6f')

## Grid search

In [79]:
#Following code has to run about 30 minutes!!
# Grid search different parameters gamma, C, degree
# Moving windows 10 periods to find the best parameters.
for gamma in [0.001,0.01,0.1,1,10,100,1000]:
    for C in [0.0001,0.001,0.01,0.1,1,10,100]:
        for degree in [2,3,4,5,6,7,8]:
            UL=0
            UP=0
            UR=0
            for k in [1,2,3,4,5,6,7,8,9,10]:
                #spilt training set into trainging and validation set.
                # The validation set is used to find the best parameters.
                X_vali=X_train[-t-k:-k]
                y_vali=y_train[-t-k:-k]
                X_tra=X_train[:-t-k]
                y_tra=y_train[:-t-k]
                svr_rbf=SVR(kernel='rbf',C=C,gamma=gamma)
                svr_linear=SVR(kernel='linear',C=C)
                svr_poly=SVR(kernel='poly',C=C,degree=degree)
            
                y_rbf=svr_rbf.fit(X_tra,y_tra).predict(X_vali)
                y_linear=svr_linear.fit(X_tra,y_tra).predict(X_vali)
                y_poly=svr_poly.fit(X_tra,y_tra).predict(X_vali)
            
                prediction=np.zeros(shape=(t+1,8))
                linear=min_max_scaler.inverse_transform(y_linear.reshape(-1,1))
                rbf=min_max_scaler.inverse_transform(y_rbf.reshape(-1,1))
                poly=min_max_scaler.inverse_transform(y_poly.reshape(-1,1))
                prediction[1:,2:5]=np.hstack((linear,rbf,poly))
                prediction[:,0]=JPY.iloc[-(2*t+1)-k:-k-t,0].values
                prediction[:,1]=np.log(JPY.iloc[-(2*t+1)-k:-t-k,0].values)
                prediction[0,1]
                for j in range (5,8):
                    prediction[1,j]=prediction[0,1]+prediction[1,j-3]
                for j in range (5,8):
                    for i in range (2,t+1):
                        prediction[i,j]=prediction[i,j-3]+ prediction[i-1,j]
                prediction[1:,5:8]=np.e**(prediction[1:,5:8])
            
                y_real=prediction[1:,0]
                y_pre_linear=prediction[1:,5]
                y_pre_poly=prediction[1:,7]
                y_pre_rbf=prediction[1:,6]
            
                MAPE_linear=1/t*np.sum(np.abs((y_real-y_pre_linear)/y_real))
                MAPE_poly=1/t*np.sum(np.abs((y_real-y_pre_poly)/y_real))
                MAPE_rbf=1/t*np.sum(np.abs((y_real-y_pre_rbf)/y_real))
                u_l=1-MAPE_linear
                u_p=1-MAPE_poly
                u_r=1-MAPE_rbf
                if k==10:
                    U_linear=(UL+u_l)/10
                    U_poly=(UP+u_p)/10
                    U_rbf=(UR+u_r)/10
                else  :
                    UL=UL+u_l
                    UP=UP+u_p
                    UR=UR+u_r
                
            if U_linear > best_U_linear:
                best_U_linear=U_linear
                best_linear_parameters = {'C':C}
            if U_poly > best_U_poly:
                best_U_poly=U_poly
                best_poly_parameters = {'degree':degree,'C':C}
            if U_rbf > best_U_rbf:
                best_U_rbf=U_rbf
                best_rbf_parameters = {'gamma':gamma,'C':C}

  y = column_or_1d(y, warn=True)


In [80]:
# Best parameters
print("Best rbf score:{:.7f}".format(best_U_rbf))
print("Best rbf parameters:{}".format(best_rbf_parameters))

Best rbf score:0.9312975
Best rbf parameters:{'gamma': 100, 'C': 0.1}


In [81]:
print("Best linear score:{:.7f}".format(best_U_linear))
print("Best linear parameters:{}".format(best_linear_parameters))

Best linear score:0.9084192
Best linear parameters:{'C': 0.0001}


In [82]:
print("Best poly score:{:.7f}".format(best_U_poly))
print("Best poly parameters:{}".format(best_poly_parameters))

Best poly score:0.9472672
Best poly parameters:{'degree': 7, 'C': 0.001}
