In [6]:
pip install pandas_datareader

Collecting pandas_datareader
  Downloading pandas_datareader-0.9.0-py3-none-any.whl (107 kB)

Installing collected packages: pandas-datareader
Successfully installed pandas-datareader-0.9.0


In [95]:
# historical VaR
import pandas as pd
import numpy as np
import pandas_datareader.data as web
from sklearn.preprocessing import StandardScaler, MinMaxScaler

## 1 (a)

In [105]:
stock_list = ['AAPL', 'HD', 'JNJ', 'JPM', 'MSFT', 'UNH', 'V', 'XOM']
stocks = []
for stock in stock_list:
    d = web.DataReader(name=stock, data_source='yahoo',
                  start='2019-01-02', end='2020-12-31')
    stocks.append(d)
data = {stock_list[i]: stocks[i] for i in range(len(stock_list))}
dji = web.DataReader(name='^dji', data_source='yahoo',
                  start='2019-01-02', end='2020-12-31')

##  (b)

In [209]:
def preprocess():
    x_train = []
    x_test = []
    
    y = dji.fillna('ffill')
    r = np.log(y['Adj Close']/y['Adj Close'].shift(1))
    y['log_return'] = r
    y['label'] = (r>0)*1 - (r<=0)*1
    y = y.loc['2019-01-04':,:]['label']
    
    for a in stock_list:
        data[a] = data[a].fillna('ffill')
        r = np.log(data[a]['Adj Close']/data[a]['Adj Close'].shift(1))
        data[a]['log_return'] = r
        d = data[a].loc['2019-01-04':,:]['log_return']
        d_train = d[:int(len(d)*0.8)]
        d_test = d[int(len(d)*0.8):-1]
        x_train.append(np.array(d_train))
        x_test.append(np.array(d_test))
        
    #train test split
    y_train = np.array(y[1:int(len(y)*0.8)+1])
    y_test = np.array(y[int(len(y)*0.8)+1:])
    
    x_train = np.array(x_train).T
    x_test = np.array(x_test).T
    return x_train, x_test, y_train, y_test

In [210]:
x_train, x_test, y_train, y_test = preprocess()

In [211]:
scaler = StandardScaler()
scaler.fit(x_train)
X_train = scaler.transform(x_train) # returns scaled dataset
X_test = scaler.transform(x_test)

In [212]:
print(X_train.shape,X_test.shape, y_train.shape, y_test.shape)

(402, 8) (100, 8) (402,) (100,)


## (c)

In [242]:
from sklearn import svm

model = svm.SVC(C=1.0, kernel='rbf', gamma=1.0)
model.fit(X_train, y_train)
prediction = model.predict(X_train)

In [243]:
model.score(X_train, y_train)

0.8681592039800995

## (d)

In [254]:
from sklearn.model_selection import cross_val_score,TimeSeriesSplit
tss = TimeSeriesSplit(n_splits=5)
scores = cross_val_score(model, X_train, y_train, cv=tss)
scores.mean()

0.5791044776119403

## (e)

In [256]:
# Grid search: seek the best model by changing hyperparameter values
C = [0.1,1.0,10,100]
gamma = [0.1,0.2,0.3,0.4,0.5,1, 5, 10]
from sklearn.model_selection import GridSearchCV
parameter_values = [{'C': C, 'gamma': gamma}]
grid_search = GridSearchCV(model, parameter_values, cv=tss)
grid_search.fit(X_train, y_train)
grid_search.best_params_, grid_search.best_score_

({'C': 1.0, 'gamma': 1}, 0.5791044776119403)

In [246]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

In [249]:
# grid_search.cv_results_

## (f)

In [350]:
dji_test = dji[-100:]
dji_test['pred_label'] = y_pred
dji_test['trade_price'] = dji_test['Adj Close'].shift(1)
dji_trade = dji_test.loc['2020-10-02':,:]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dji_test['pred_label'] = y_pred
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dji_test['trade_price'] = dji_test['Adj Close'].shift(1)


In [351]:
dji_trade

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,pred_label,trade_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-10-02,27861.429688,27382.939453,27536.390625,27682.810547,392770000,27682.810547,1,27816.900391
2020-10-05,28162.640625,27825.419922,27825.419922,28148.640625,318210000,28148.640625,1,27682.810547
2020-10-06,28354.480469,27728.029297,28214.240234,27772.759766,435030000,27772.759766,-1,28148.640625
2020-10-07,28369.660156,27971.359375,27971.359375,28303.460938,328750000,28303.460938,1,27772.759766
2020-10-08,28459.130859,28265.560547,28348.859375,28425.509766,314750000,28425.509766,-1,28303.460938
...,...,...,...,...,...,...,...,...
2020-12-24,30209.669922,30099.300781,30155.919922,30199.869141,145570000,30199.869141,1,30129.830078
2020-12-28,30525.560547,30283.230469,30283.230469,30403.970703,302490000,30403.970703,1,30199.869141
2020-12-29,30588.789062,30274.240234,30492.070312,30335.669922,357610000,30335.669922,-1,30403.970703
2020-12-30,30525.349609,30393.039062,30415.089844,30409.560547,291890000,30409.560547,1,30335.669922


In [364]:
status = 0 ## hold nothing
buy_price = 0
sell_price = 0
capital = 10000
for i in range(0,len(dji_trade)):
    action = 0
    rr = 0
    if status == 0:
        if dji_trade.iloc[i,6] == 1:
            print('buy')
            status = 1
            buy_price = dji_trade.iloc[i,7]
            action = 1
            rr = 0
        else:
            pass

    
    elif status == 1:
        if dji_trade.iloc[i,6] == 1:
            pass
        else:
            status = 0
            sell_price = dji_trade.iloc[i,7]
            rate = sell_price/buy_price
            capital = capital*rate
            action = 1
            print('sell')
    if action == 1:
        print('day',i)
        print(capital)
        print('-----')
if status != 0:
    capital = dji_trade.iloc[-1,5]/dji_trade.iloc[-1,7]*capital
    
print('final_capital',capital)

buy
day 0
10000
-----
sell
day 2
10119.258518999768
-----
buy
day 3
10119.258518999768
-----
sell
day 4
10312.624334995708
-----
buy
day 5
10312.624334995708
-----
sell
day 11
10378.217900489466
-----
buy
day 12
10378.217900489466
-----
sell
day 13
10419.947005707887
-----
buy
day 14
10419.947005707887
-----
sell
day 18
10143.802113491918
-----
buy
day 19
10143.802113491918
-----
sell
day 29
11244.506827039806
-----
buy
day 30
11244.506827039806
-----
sell
day 35
11400.359323198607
-----
buy
day 36
11400.359323198607
-----
sell
day 39
11637.6075036947
-----
buy
day 40
11637.6075036947
-----
sell
day 42
11603.97160732433
-----
buy
day 43
11603.97160732433
-----
sell
day 50
11667.101975649894
-----
buy
day 51
11667.101975649894
-----
sell
day 52
11799.066911352187
-----
buy
day 53
11799.066911352187
-----
sell
day 57
11744.666611182905
-----
buy
day 58
11744.666611182905
-----
sell
day 60
11851.527162233404
-----
buy
day 61
11851.527162233404
-----
sell
day 62
11880.394721495393
-----
fi

In [365]:
capital

11880.394721495393

## (g) buy and hold, buy at 09/30/2020 adj close, sell at 12/31/2020 adj close

In [359]:
buy_n_hold = dji_test.loc['2020-12-31','Adj Close'] / dji_test.loc['2020-09-30','Adj Close'] *10000
buy_n_hold

11016.77770958428

#### Both strategies made money. Trading strategy using SVM turned 10000 capital into 11880.40, while Buy_and_Hold strategy turned 10000 into 11016.78. From the result, the trading strategy is better and almost doubled the return of the buy and hold strategy. However, this is based on a small sample size, more market data is yet to be tested out.