# Linear Regression Indicator

In [13]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go

In [14]:
import sys
import importlib
sys.path.append('../')

from IPython.display import display, Math, Latex

# From data_preparation classes
from src.finance_ml.data_preparation.data_preparation import DataLoader

from src.finance_ml.indicators.indicators import Indicators

In [15]:
fname_USDBRL = 'equities/FB_2020-04-07_2022-04-06.parquet'
# No. of Records from example dataset
N = 200
#equities

In [16]:
# Dataset chosen in this simulation
ticker = 'equities'
fname = fname_USDBRL

In [17]:
# Defining time_index_col (must be the same column in all inputs) and keep_cols refering to the columns that will remain in the dataset
dataloader = DataLoader(time_index_col= 'DATE', 
                    keep_cols = ['VOLUME', 'OPEN', 'CLOSE', 'HIGHT', 'LOW', 'TRANSACTIONS'])

In [18]:
df = dataloader.file_read('../data/'+fname_USDBRL,'parquet').iloc[:N]
df.reset_index(drop=False, inplace=True)
display(df)

Unnamed: 0,DATE,VOLUME,VW,OPEN,CLOSE,HIGHT,LOW,t,TRANSACTIONS,a,op
0,2020-04-07 15:41:00,49517.0,169.7005,169.7050,169.700,169.8200,169.5811,1586274060000,450,,
1,2020-04-07 15:42:00,38624.0,169.7052,169.6700,169.740,169.8105,169.6200,1586274120000,456,,
2,2020-04-07 15:43:00,24795.0,169.6920,169.7200,169.570,169.8050,169.5700,1586274180000,348,,
3,2020-04-07 15:44:00,33196.0,169.4669,169.5500,169.410,169.5500,169.3900,1586274240000,433,,
4,2020-04-07 15:45:00,67190.0,169.2253,169.3900,169.050,169.4800,168.9700,1586274300000,650,,
...,...,...,...,...,...,...,...,...,...,...,...
195,2020-04-07 18:56:00,103549.0,167.9256,167.7200,168.170,168.1800,167.7100,1586285760000,524,,
196,2020-04-07 18:57:00,44672.0,168.2474,168.1610,168.190,168.3300,168.1610,1586285820000,426,,
197,2020-04-07 18:58:00,33642.0,168.0721,168.2027,168.065,168.2300,168.0000,1586285880000,399,,
198,2020-04-07 18:59:00,43609.0,168.0636,168.0300,168.100,168.1700,168.0000,1586285940000,402,,


In [19]:
selected_columns = ['VOLUME', 'OPEN', 'CLOSE', 'HIGHT', 'LOW', 'TRANSACTIONS', 'DATE']
result_df = df[selected_columns]
df = result_df

In [20]:
display(df)

Unnamed: 0,VOLUME,OPEN,CLOSE,HIGHT,LOW,TRANSACTIONS,DATE
0,49517.0,169.7050,169.700,169.8200,169.5811,450,2020-04-07 15:41:00
1,38624.0,169.6700,169.740,169.8105,169.6200,456,2020-04-07 15:42:00
2,24795.0,169.7200,169.570,169.8050,169.5700,348,2020-04-07 15:43:00
3,33196.0,169.5500,169.410,169.5500,169.3900,433,2020-04-07 15:44:00
4,67190.0,169.3900,169.050,169.4800,168.9700,650,2020-04-07 15:45:00
...,...,...,...,...,...,...,...
195,103549.0,167.7200,168.170,168.1800,167.7100,524,2020-04-07 18:56:00
196,44672.0,168.1610,168.190,168.3300,168.1610,426,2020-04-07 18:57:00
197,33642.0,168.2027,168.065,168.2300,168.0000,399,2020-04-07 18:58:00
198,43609.0,168.0300,168.100,168.1700,168.0000,402,2020-04-07 18:59:00


In [21]:
# Number of bars for each segment
num_bars_segment = 20

In [22]:
# Calculate the number of segments
num_segments = len(df) // num_bars_segment

In [23]:
# Extracting numeric representation of dates for regression
X = np.array(pd.to_numeric(df['DATE'])).reshape(-1, 1)

# Extracting closing prices as the dependent variable
y = df['CLOSE'].values

# Creating the figure
fig = go.Figure()

In [24]:
# Loop through each segment and plot linear regression line
for i in range(num_segments):
    start_index = i * num_bars_segment
    end_index = min((i + 1) * num_bars_segment, len(df))

    # Subset of data for the current segment
    X_segment = X[start_index:end_index]
    y_segment = y[start_index:end_index]

    # Creating a linear regression model for the segment
    model = LinearRegression()
    model.fit(X_segment, y_segment)
    y_pred_segment = model.predict(X_segment)

    # Creating a trace for the linear regression line
    linear_regression_trace = go.Scatter(x=df['DATE'][start_index:end_index], y=y_pred_segment,
                                        mode='lines', name=f'Linear Regression Line {i + 1}', line=dict(color='red'))

    # Adding the linear regression trace to the figure
    fig.add_trace(linear_regression_trace)

# Creating the candlestick trace
candlestick_trace = go.Candlestick(x=df['DATE'], open=df['OPEN'], high=df['HIGHT'], low=df['LOW'], close=df['CLOSE'], name='Candlestick')

# Adding the candlestick trace to the figure
fig.add_trace(candlestick_trace)

# Display the figure
fig.show()