### Import Package

In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from stockstats import StockDataFrame

from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.svm import SVR

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models

from math import floor

### Set the data source path

In [None]:
# Set the data source path
interval = "daily"
region = "us"
ex_product = "nasdaq stocks"
section = "1"
stock = "aapl"
data_path = "test_data/"+interval+"/"+region+"/"+ex_product+"/"+section+"/"+stock+"."+region+".txt"

# Use Apple .Inc stock for training

# Extract only the OLHC
column_to_use = ["OPEN","LOW","HIGH","CLOSE"]



### Load the stock data

In [None]:
# Load the data
ori_data = pd.read_csv(data_path, sep=",")

# Rename the column names
ori_data.columns = [colname[1:-1] for colname in ori_data.columns]

# Drop the unnecessary
ori_data.index = ori_data["DATE"]
ori_data = ori_data.drop(columns=['DATE','PER','TIME', 'TICKER', 'OPENINT'])
ori_data.columns = ["open","high","low","close","volume"]

In [None]:
# Use online package to generate additional features
x = StockDataFrame(ori_data)
data = x[['open','high','low','close','volume',
          'boll', 'boll_ub', 'boll_lb',
          'macd', 'macdh', 'macds',
          'rsi_11', 'rsi_14', 'rsi_21']]



In [None]:
data

### Split the train and test data

In [None]:
# Split the train and test data

def custom_split(data,start,end):
    train = (data.index >= start) & (data.index <= end)
    train_X = data[train]
    
    return train_X

In [None]:
train_X = custom_split(data,start = 20130101,end = 20171031)
valid_X = custom_split(data,start = 20171101,end = 20181231)
test_X = custom_split(data,start = 20190101,end = 20201231)

In [None]:
len(test_X)

### Label the target result (opening price on 11th day)

In [None]:
# Assume we use 10 days price data to predict closing price of the 11th day
num_day_to_predict = 10

In [None]:
def produce_result_target_price(X,num_day = 10,result_col_name = "result_price"):
    y = pd.DataFrame(np.nan, index=X.index, columns=[result_col_name])
    for i in range(10,len(X)):
        y.loc[X.index[i-1],result_col_name] = X.loc[X.index[i],"open"]
    
    return y

In [None]:
train_y = produce_result_target_price(train_X,num_day_to_predict)
valid_y = produce_result_target_price(valid_X,num_day_to_predict)
test_y = produce_result_target_price(test_X,num_day_to_predict)

In [None]:
test_y.head(20)

### Transform the 10-day data into one vector

In [None]:
def transform_x_data_to_one_vector(X,num_day = 10):
    col_name = []
    for i in range(num_day):
        for j in X.columns:
            col_name.append(j+"-"+str(i))
    new_X = pd.DataFrame(np.nan, index=X.index, columns=col_name)
    
    for i in range(len(X)-9):
        for col in col_name:
            split_list = col.split("-")
            new_X.loc[X.index[i],col] = X.loc[X.index[i+int(split_list[1])],split_list[0]]
    
    return new_X

In [None]:
train_X_10 = transform_x_data_to_one_vector(train_X,num_day_to_predict)
valid_X_10 = transform_x_data_to_one_vector(valid_X,num_day_to_predict)
test_X_10 = transform_x_data_to_one_vector(test_X,num_day_to_predict)

In [None]:
test_X_10.tail(20)

### Drop out rows with NaN

In [None]:
def drop_nan_row_y(y,num_day = 10):
    drop_list = [y.index[i] for i in range(num_day-1)]
    drop_list.append(y.index[-1])
    return y.drop(drop_list)

def drop_nan_row_X_10(X,num_day = 10):
    drop_list = [X.index[-i] for i in range(1,num_day+1)]
    return X.drop(drop_list)

In [None]:
new_train_X_10 = drop_nan_row_X_10(train_X_10,num_day_to_predict)
new_train_y = drop_nan_row_y(train_y,num_day_to_predict)

new_valid_X_10  = drop_nan_row_X_10(valid_X_10,num_day_to_predict)
new_valid_y = drop_nan_row_y(valid_y,num_day_to_predict)

new_test_X_10  = drop_nan_row_X_10(test_X_10,num_day_to_predict)
new_test_y = drop_nan_row_y(test_y,num_day_to_predict)



In [None]:
# Keep a copy for reverting prediction result
old_train_X_10 = new_train_X_10
old_train_y = new_train_y

### Normalize data row by row

In [None]:
def normalize_data_by_row(X,y):
    norm_X = X.sub(X.mean(axis=0), axis=1).div(X.std(axis=0), axis=1)
    
    col_list =[] 
    for col in X.columns:
        if "open" in col:
            col_list.append(col)
    
    temp_y = y.loc[:,"result_price"]
    mean_10_day = np.mean(X.loc[:,col_list],axis=1)
    mean_10_day.index = temp_y.index
    std_10_day = np.std(X.loc[:,col_list],axis=1)
    std_10_day.index = temp_y.index

    norm_y = pd.DataFrame((temp_y-mean_10_day)/std_10_day,
                          columns=["result_price"])
                         
    return (norm_X,norm_y)

In [None]:
new_train_X_10,new_train_y = normalize_data_by_row(new_train_X_10,new_train_y)
new_valid_X_10,new_valid_y = normalize_data_by_row(new_valid_X_10,new_valid_y)
new_test_X_10,new_test_y = normalize_data_by_row(new_test_X_10,new_test_y)

### Model Training and Fitting

In [None]:
# Use Support Vector Regressor to fit the data

# C : Regularization parameter. The strength of the regularization is inversely proportional to C.
C = 100

# gamma : Kernel coefficient
gamma = 0.00001

# radial basis function kernel
kernel="rbf"

svr = SVR(kernel=kernel, C=C, gamma=gamma)
svr.fit(new_train_X_10, new_train_y)

### Model Testing

In [None]:

svr_prediction = svr.predict(new_valid_X_10)
svr_confidence = svr.score(new_valid_X_10, new_valid_y)
print("svr confidence: ", svr_confidence)

In [None]:
reverted_predict_y = new_valid_X_10.mean(axis=1) + new_valid_X_10.std(axis=1)*svr_prediction
reverted_predict_y

In [None]:
reverted_predict_y

### Plot the graph

In [None]:
plt.plot(old_test_y.index, old_test_y["result_price"], label = "Actual", color = 'Black')
plt.plot(reverted_predict_y.index, reverted_predict_y, label = "Predicted", color = 'Orange')
plt.xlabel("timestamp")
plt.ylabel("Price (USD)")
plt.title("Prediction of "+stock.upper()+" using SVR")

plt.legend()
plt.savefig("plot/SVR/"+stock.upper()+"-day("+str(num_day_to_predict)+")kernel("+kernel+")C("+str(C)+")gamma("+str(gamma)+")confidence("+str(round(svm_confidence,5))+").jpg",
            dpi=600)
plt.show()




