## Importing `amzn` data and filling missing values (if any)

In [2]:
import pandas as pd
import ta

In [3]:
df = pd.read_csv('./Stocks/amzn.us.txt')
df.ffill().bfill()

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt
0,1997-05-16,1.97,1.98,1.71,1.73,14700000,0
1,1997-05-19,1.76,1.77,1.62,1.71,6106800,0
2,1997-05-20,1.73,1.75,1.64,1.64,5467200,0
3,1997-05-21,1.64,1.65,1.38,1.43,18853200,0
4,1997-05-22,1.44,1.45,1.31,1.40,11776800,0
...,...,...,...,...,...,...,...
5148,2017-11-06,1109.15,1125.41,1108.77,1120.66,3331738,0
5149,2017-11-07,1124.74,1130.60,1117.50,1123.17,2684443,0
5150,2017-11-08,1122.82,1135.54,1119.11,1132.88,2576010,0
5151,2017-11-09,1125.96,1129.62,1115.77,1129.13,3729978,0


## Adding the below features for the Random Forests model to get trained

In [4]:
df['7-day Moving Average'] = df['Close'].rolling(7).mean()
df['30-day Moving Average'] = df['Close'].rolling(30).mean()
df['RSI'] = ta.momentum.RSIIndicator(close=df['Close'], window=14).rsi()

df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

In [6]:
for x in range(1,6):
    df[f"prev_{x}"] = df['Close'].shift(x)

df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

df['STD_7'] = df['Close'].rolling(7).std().astype('float32')
df['STD_30'] = df['Close'].rolling(30).std().astype('float32')
df['Vol_Change'] = df['Volume'].pct_change().astype('float32')
df['Vol_MA_7'] = df['Volume'].rolling(7).mean().astype('float32')
df['MACD'] = ta.trend.MACD(df['Close']).macd().astype('float32')

df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

In [17]:
df.replace([float('inf'), float('-inf')], pd.NA, inplace=True)
df.dropna(inplace=True)
df

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt,7-day Moving Average,30-day Moving Average,RSI,prev_1,prev_2,prev_3,prev_4,prev_5,STD_7,STD_30,Vol_Change,Vol_MA_7,MACD
0,1997-08-15,2.14,2.16,1.94,2.11,1597200,0,2.210000,2.272333,46.173737,2.16,2.20,2.20,2.33,2.29,0.075719,0.122522,0.53341,1326514.250,-0.003286
1,1997-08-18,2.05,2.05,1.97,2.04,1784400,0,2.190000,2.273667,43.048135,2.11,2.16,2.20,2.20,2.33,0.099666,0.119640,0.117205,1290857.125,-0.018017
2,1997-08-19,2.09,2.21,2.05,2.17,1003200,0,2.172857,2.269333,49.839134,2.04,2.11,2.16,2.20,2.20,0.089389,0.121000,-0.437794,1118228.625,-0.018983
3,1997-08-20,2.19,2.19,2.07,2.17,999600,0,2.150000,2.264333,49.839134,2.17,2.04,2.11,2.16,2.20,0.057155,0.121929,-0.003589,1083942.875,-0.019523
4,1997-08-21,2.14,2.17,2.07,2.11,624000,0,2.137143,2.249333,46.848918,2.17,2.17,2.04,2.11,2.16,0.054072,0.111539,-0.37575,1091485.750,-0.024511
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5085,2017-11-06,1109.15,1125.41,1108.77,1120.66,3331738,0,1106.748571,1008.438333,75.958310,1111.60,1094.22,1103.68,1105.28,1110.85,8.524937,57.809269,-0.111887,5880890.500,35.634190
5086,2017-11-07,1124.74,1130.60,1117.50,1123.17,2684443,0,1109.922857,1014.590667,76.304545,1120.66,1111.60,1094.22,1103.68,1105.28,10.012946,59.903904,-0.194281,3899725.500,37.340736
5087,2017-11-08,1122.82,1135.54,1119.11,1132.88,2576010,0,1113.070000,1020.657667,77.645752,1123.17,1120.66,1111.60,1094.22,1103.68,13.281528,62.392982,-0.040393,3323003.500,39.026825
5088,2017-11-09,1125.96,1129.62,1115.77,1129.13,3729978,0,1116.477143,1026.415333,75.859923,1132.88,1123.17,1120.66,1111.60,1094.22,13.990312,64.202354,0.447967,3359148.000,39.603939


## Dividing the DataFrame into `train` and `test` DataFrames (80% and 20% resp.)

In [18]:
df_train = df[0:4073]
df_test = df[4073:5090]

In [19]:
X_train = df_train[['7-day Moving Average','30-day Moving Average','RSI','prev_1','prev_2','prev_3','prev_4','prev_5','Volume','STD_7','STD_30','Vol_Change','Vol_MA_7','MACD']].astype('float32')
y_train = df_train['Close']
X_test = df_test[['7-day Moving Average','30-day Moving Average','RSI','prev_1','prev_2','prev_3','prev_4','prev_5','Volume','STD_7','STD_30','Vol_Change','Vol_MA_7','MACD']].astype('float32')
y_test = df_test['Close']

## Fitting the model with training data 

In [20]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

In [45]:
rf = RandomForestRegressor(random_state=42)


In [46]:
rf.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


## Obtaining `score` and `mean_absolute_error`

In [49]:
rf.score(X_test, y_test)

-1.0095632183217456

In [50]:
y_predict = rf.predict(X_test)
mean_absolute_error(y_test, y_predict)

250.82273720472426