## Importing libraries, `amzn` dataset and adding 5 lagging features `prev_`

In [55]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [56]:
df = pd.read_csv('./Stocks/amzn.us.txt', delimiter=',')

In [57]:
for x in range(1,6):
    df[f"prev_{x}"] = df['Close'].shift(x)

df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt,prev_1,prev_2,prev_3,prev_4,prev_5
0,1997-05-23,1.41,1.52,1.33,1.50,15937200,0,1.40,1.43,1.64,1.71,1.73
1,1997-05-27,1.51,1.65,1.46,1.58,8697600,0,1.50,1.40,1.43,1.64,1.71
2,1997-05-28,1.62,1.64,1.53,1.53,4574400,0,1.58,1.50,1.40,1.43,1.64
3,1997-05-29,1.54,1.54,1.48,1.51,3472800,0,1.53,1.58,1.50,1.40,1.43
4,1997-05-30,1.50,1.51,1.48,1.50,2594400,0,1.51,1.53,1.58,1.50,1.40
...,...,...,...,...,...,...,...,...,...,...,...,...
5143,2017-11-06,1109.15,1125.41,1108.77,1120.66,3331738,0,1111.60,1094.22,1103.68,1105.28,1110.85
5144,2017-11-07,1124.74,1130.60,1117.50,1123.17,2684443,0,1120.66,1111.60,1094.22,1103.68,1105.28
5145,2017-11-08,1122.82,1135.54,1119.11,1132.88,2576010,0,1123.17,1120.66,1111.60,1094.22,1103.68
5146,2017-11-09,1125.96,1129.62,1115.77,1129.13,3729978,0,1132.88,1123.17,1120.66,1111.60,1094.22


## Dividing the DataFrame into `train` and `test` DataFrames (80% and 20% resp.)

In [58]:
df_train = df[0:4119]
df_test  = df[4119:5149]

In [59]:
X_train = df_train[['prev_1','prev_2','prev_3','prev_4','prev_5']]
y_train = df_train['Close']
X_test = df_test[['prev_1','prev_2','prev_3','prev_4','prev_5']]
y_test = df_test['Close']

## Fitting the `LinearRegression()` model with the `df_train` data

In [60]:
reg = LinearRegression()

In [61]:
reg.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [62]:
from sklearn.metrics import mean_absolute_error

## Obtaining predictions, score and mean absolute error

In [63]:
y_predict = reg.predict(X_test)

In [64]:
print(y_predict)

[ 310.66218515  310.80604228  306.57657362 ... 1123.72967521 1132.84685254
 1128.95480558]


In [65]:
mean_absolute_error(y_test, y_predict)

6.658371171845339

In [66]:
reg.score(X_test, y_test)

0.9980930641602391

In [67]:
df_test['Prediction'] = y_predict
df_test['Prev_Close'] = df_test['Close'].shift(1)
df_test.dropna(inplace=True)
print(df_test)

            Date     Open     High      Low    Close   Volume  OpenInt  \
4120  2013-10-15   309.92   310.79   305.26   306.40  2261100        0   
4121  2013-10-16   308.38   310.80   305.55   310.49  2178262        0   
4122  2013-10-17   307.00   311.00   305.24   310.77  2645160        0   
4123  2013-10-18   319.39   331.89   316.75   328.93  5969400        0   
4124  2013-10-21   329.89   330.00   323.80   326.44  2525777        0   
...          ...      ...      ...      ...      ...      ...      ...   
5143  2017-11-06  1109.15  1125.41  1108.77  1120.66  3331738        0   
5144  2017-11-07  1124.74  1130.60  1117.50  1123.17  2684443        0   
5145  2017-11-08  1122.82  1135.54  1119.11  1132.88  2576010        0   
5146  2017-11-09  1125.96  1129.62  1115.77  1129.13  3729978        0   
5147  2017-11-10  1126.10  1131.75  1124.06  1125.35  2179181        0   

       prev_1   prev_2   prev_3   prev_4   prev_5   Prediction  Prev_Close  
4120   310.70   310.89   305.17   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['Prediction'] = y_predict
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['Prev_Close'] = df_test['Close'].shift(1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test.dropna(inplace=True)


## Backtesting 

In [68]:
from backtesting.lib import crossover
from backtesting import Backtest, Strategy

class MyStrategy(Strategy):

    def init(self):
        self.pred = self.I(lambda x: x, self.data.Prediction)
        self.actu = self.I(lambda x: x, self.data.Prev_Close)

    def next(self):
        if crossover(self.pred, self.actu):
            self.position.close()
            self.buy()
        elif crossover(self.actu, self.pred):
            self.position.close()
            self.sell(size=0.1)

bt = Backtest(df_test, MyStrategy, cash = 10_000)
stats = bt.run()
print(stats)

  (data.index.is_numeric() and
  bt = Backtest(df_test, MyStrategy, cash = 10_000)


Backtest.run:   0%|          | 0/1027 [00:00<?, ?bar/s]

Start                                  4120.0
End                                    5147.0
Duration                               1027.0
Exposure Time [%]                    99.61089
Equity Final [$]                     22166.23
Equity Peak [$]                      22223.05
Return [%]                           121.6623
Buy & Hold Return [%]               267.28133
Return (Ann.) [%]                         0.0
Volatility (Ann.) [%]                     NaN
Sharpe Ratio                              NaN
Sortino Ratio                             NaN
Calmar Ratio                              0.0
Alpha [%]                           -35.31782
Beta                                  0.58732
Max. Drawdown [%]                   -22.82513
Avg. Drawdown [%]                     -3.0558
Max. Drawdown Duration                  261.0
Avg. Drawdown Duration                  19.36
# Trades                                478.0
Win Rate [%]                         53.76569
Best Trade [%]                    

In [51]:
df_test.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt,prev_1,prev_2,prev_3,prev_4,prev_5,Prediction
4119,2013-10-14,309.22,311.64,307.0,310.7,1938900,0,310.89,305.17,298.23,303.23,310.03,310.662185
4120,2013-10-15,309.92,310.79,305.26,306.4,2261100,0,310.7,310.89,305.17,298.23,303.23,310.806042
4121,2013-10-16,308.38,310.8,305.55,310.49,2178262,0,306.4,310.7,310.89,305.17,298.23,306.576574
4122,2013-10-17,307.0,311.0,305.24,310.77,2645160,0,310.49,306.4,310.7,310.89,305.17,310.724084
4123,2013-10-18,319.39,331.89,316.75,328.93,5969400,0,310.77,310.49,306.4,310.7,310.89,310.628002
