### Implementation of Elastic Net with Indicators

### Importing the Libraries

In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from indicators import Indicators

### Data Collection and Pre-Processing

In [51]:
df = pd.read_parquet('KBWY_2020-04-07_2022-04-06.parquet')

In [52]:
df.shape

(75601, 11)

In [53]:
df.head()

Unnamed: 0,VOLUME,VW,OPEN,CLOSE,HIGHT,LOW,t,TRANSACTIONS,a,op,DATE
0,101,15.9531,15.9531,15.9531,15.9531,15.9531,1586283720000,2,,,2020-04-07 18:22:00
1,642,15.9981,16.0,15.995,16.0,15.995,1586283840000,3,,,2020-04-07 18:24:00
2,200,16.015,16.015,16.015,16.015,16.015,1586283900000,1,,,2020-04-07 18:25:00
3,200,16.0393,16.04,16.04,16.04,16.04,1586284020000,2,,,2020-04-07 18:27:00
4,130,16.0582,16.0606,16.0606,16.0606,16.0606,1586284080000,3,,,2020-04-07 18:28:00


### Cleaning Data

In [54]:
df.drop('DATE', axis=1, inplace = True)

In [55]:
df2 = df.fillna(0)

### Implementing Indicators 

In [56]:
# Process data normalization
norm_data = True

In [57]:
# Select Indicator transformer
ind = Indicators(ticker = '', norm_data = True, calc_all = False, list_ind = ["VWAP","MFI"])

In [58]:
# Calculate Indicators over input dataframe
df2 = ind.fit_transform(df2)

In [59]:
df2.head()

Unnamed: 0,VOLUME,VW,OPEN,CLOSE,HIGHT,LOW,t,TRANSACTIONS,a,op,MFI_w14,VWAP_w14
0,101,15.9531,15.9531,15.9531,15.9531,15.9531,1586283720000,2,0.0,0.0,,
1,642,15.9981,16.0,15.995,16.0,15.995,1586283840000,3,0.0,0.0,,
2,200,16.015,16.015,16.015,16.015,16.015,1586283900000,1,0.0,0.0,,
3,200,16.0393,16.04,16.04,16.04,16.04,1586284020000,2,0.0,0.0,,
4,130,16.0582,16.0606,16.0606,16.0606,16.0606,1586284080000,3,0.0,0.0,,


In [60]:
df2.shape

(75601, 12)

In [61]:
df2.isnull().sum().sum()

31

In [62]:
df2 = df2.fillna(0)

In [63]:
df2.tail()

Unnamed: 0,VOLUME,VW,OPEN,CLOSE,HIGHT,LOW,t,TRANSACTIONS,a,op,MFI_w14,VWAP_w14
75596,243,25.11,25.11,25.11,25.11,25.11,1649274780000,4,25.0225,24.95,71.831921,25.096402
75597,614,25.0935,25.1,25.09,25.1,25.09,1649274840000,8,25.0229,24.95,54.571254,25.099799
75598,206,25.1,25.1,25.1,25.1,25.1,1649275020000,3,25.023,24.95,54.63121,25.100296
75599,202,25.11,25.11,25.11,25.11,25.11,1649275080000,3,25.0232,24.95,56.514163,25.112274
75600,1025,25.0981,25.1,25.1,25.1,25.095,1649275140000,12,25.0239,24.95,47.532446,25.110645


### Splitting Data 

In [64]:
x = df2.iloc[:, [col for col in range(len(df2.columns)) if col != 3]]
y = df2.iloc[:, 3]

In [65]:
print(x)
print(y)

       VOLUME       VW     OPEN    HIGHT      LOW              t  \
0         101  15.9531  15.9531  15.9531  15.9531  1586283720000   
1         642  15.9981  16.0000  16.0000  15.9950  1586283840000   
2         200  16.0150  16.0150  16.0150  16.0150  1586283900000   
3         200  16.0393  16.0400  16.0400  16.0400  1586284020000   
4         130  16.0582  16.0606  16.0606  16.0606  1586284080000   
...       ...      ...      ...      ...      ...            ...   
75596     243  25.1100  25.1100  25.1100  25.1100  1649274780000   
75597     614  25.0935  25.1000  25.1000  25.0900  1649274840000   
75598     206  25.1000  25.1000  25.1000  25.1000  1649275020000   
75599     202  25.1100  25.1100  25.1100  25.1100  1649275080000   
75600    1025  25.0981  25.1000  25.1000  25.0950  1649275140000   

       TRANSACTIONS        a     op    MFI_w14   VWAP_w14  
0                 2   0.0000   0.00   0.000000   0.000000  
1                 3   0.0000   0.00   0.000000   0.000000  
2  

In [66]:
x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=42)

### Training ElasticNet Model

In [67]:
enet = ElasticNet(alpha=0.1, l1_ratio=0.5)

In [68]:
enet.fit(x_train, y_train)

  model = cd_fast.enet_coordinate_descent(


ElasticNet(alpha=0.1)

### Model Evaluation

In [69]:
y_pred = enet.predict(x_test)

In [70]:
y_pred

array([22.24007037, 23.54117456, 24.01331404, ..., 25.73215362,
       18.47985285, 19.13316548])

In [71]:
mean_squared_error(y_test, y_pred)

0.0034470977384089526

In [72]:
mean_absolute_error(y_test,y_pred)

0.04099734691021624

In [73]:
root_mean_squared_error = np.sqrt(mean_squared_error(y_test, y_pred))
root_mean_squared_error

0.058711989733008986

### Evaluation for best estimation hyperparameters

In [74]:
el_net_grid = {
    'alpha': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
}

In [75]:
elastic_cv = GridSearchCV(enet, el_net_grid, cv=3, scoring= 'neg_mean_squared_error',n_jobs=-1)

In [76]:
elastic_cv.fit(x_train, y_train)

  model = cd_fast.enet_coordinate_descent(


GridSearchCV(cv=3, estimator=ElasticNet(alpha=0.1), n_jobs=-1,
             param_grid={'alpha': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
                         'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0]},
             scoring='neg_mean_squared_error')

In [77]:
y_pred2 = elastic_cv.predict(x_test)

In [78]:
y_pred2

array([22.26254227, 23.55752846, 23.99146065, ..., 25.73155614,
       18.45365374, 19.10610455])

In [79]:
mean_squared_error(y_test, y_pred2)

0.0015490203538015492

In [80]:
mean_absolute_error(y_test,y_pred2)

0.021131218458073205

In [81]:
root_mean_squared_error = np.sqrt(mean_squared_error(y_test, y_pred2))
root_mean_squared_error

0.03935759588442299

In [82]:
elastic_cv.best_estimator_

ElasticNet(alpha=0.1, l1_ratio=0.1)

### We can derive that the model performs well for alpha=0.1 and l1_ratio=0.1