# Importing libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt

# Importing data

In [2]:
# Importing LightGBM results
LGBM_no_sent = pd.read_csv('../datasets/processed_data/model_predictions/LightGBM/woSentiment/TV18BRDCST.BO.csv')
LGBM_w_sent = pd.read_csv('../datasets/processed_data/model_predictions/LightGBM/wSentiment/TV18BRDCST.BO.csv')

In [3]:
# Importing Prophet results
Prophet = pd.read_csv('../datasets/processed_data/model_predictions/Prophet/TV18BRDCST.BO.csv')

In [4]:
# Importing RF results
RF_no_sent = pd.read_csv('random_forest/rf_TV18BRDCST_without_sentiment.csv')
RF_w_sent = pd.read_csv('random_forest/rf_TV18BRDCST.csv')

In [5]:
# Importing LSTM results
LSTM = pd.read_csv('../datasets/processed_data/model_predictions/LSTM/TV18BRDCST_LSTM_predictions.csv')

In [6]:
# Linear Regression results
LR_no_sent = pd.read_csv('linear_regression/lr_TV18BRDCST_without_sentiment.csv')
LR_w_sent = pd.read_csv('linear_regression/lr_TV18BRDCST.csv')

# Combining datasets

## LSTM predictions outcome

In [7]:
LSTM.shape

(794, 4)

In [8]:
LSTM.head()

Unnamed: 0,date,Actual High,Predicted high LSTM (no sentiments),Predicted high LSTM (with sentiments)
0,2022-04-28,81.0,60.953505,61.940063
1,2022-04-28,81.0,60.965583,61.940926
2,2022-04-28,81.0,60.97331,61.943616
3,2022-04-28,81.0,60.97331,61.939219
4,2022-04-28,81.0,60.97331,61.955326


## LGBM predictions outcome

In [9]:
LGBM_no_sent.head(2)

Unnamed: 0,date,high,pred_high
0,2020-10-29,29.5,30.538
1,2020-10-30,29.5,29.599


In [10]:
LGBM_no_sent.columns = ['date', 'high', 'Predicted high LGBM (no sentiments)']

In [11]:
LGBM_no_sent.shape

(392, 3)

In [12]:
LGBM_w_sent.head(2)

Unnamed: 0,date,high,pred_high
0,2020-10-29,29.5,31.042
1,2020-10-30,29.5,29.91


In [13]:
LGBM_w_sent.columns = ['date', 'high', 'Predicted high LGBM (with sentiments)']

In [14]:
LGBM_w_sent.shape

(392, 3)

## Prophet predictions outcome

In [15]:
Prophet.head(2)

Unnamed: 0,date,high,pred_high_lower,pred_high_upper,pred_high
0,2022-02-09,70.0,72.732,76.318,74.507
1,2022-02-09,70.0,72.682,76.401,74.507


In [16]:
Prophet.columns= ['date', 'high', 'pred_high_lower_Prophet', 'pred_high_upper_Prophet', 'Predicted high Prophet']

In [17]:
Prophet.shape

(402, 5)

## RF predictions outcome

In [18]:
RF_no_sent.head(2)

Unnamed: 0,date,y_test,y_pred
0,2020-10-29,29.5,30.680769
1,2020-10-30,29.5,29.751698


In [19]:
RF_no_sent.columns = ['date', 'y_test', 'Predicted high RF (no sentiments)']

In [20]:
RF_no_sent.shape

(392, 3)

In [21]:
RF_w_sent.columns = ['date', 'y_test', 'Predicted high RF (with sentiments)']

In [22]:
RF_w_sent.head(2)

Unnamed: 0,date,y_test,Predicted high RF (with sentiments)
0,2020-10-29,29.5,30.628386
1,2020-10-30,29.5,30.047123


In [23]:
RF_w_sent.shape

(794, 3)

## Linear regression predictions outcome

In [24]:
LR_no_sent.head(2)

Unnamed: 0,date,y_test,y_pred,label,predicted_label
0,2020-10-29,29.5,30.690351,Actual,Predicted
1,2020-10-30,29.5,29.786468,Actual,Predicted


In [32]:
LR_no_sent.columns = ['date', 'high', 'Predicted high LR (no sentiments)', 'label','predicted_label']

In [33]:
LR_w_sent.head(2)

Unnamed: 0,date,high,Predicted high LR (with sentiments)
0,2020-10-29,29.5,30.706082
1,2020-10-30,29.5,29.947549


In [34]:
LR_w_sent.columns = ['date', 'high', 'Predicted high LR (with sentiments)']

## Combining results into dataframe

In [35]:
df = pd.concat([RF_no_sent, RF_w_sent['Predicted high RF (with sentiments)'], 
                LGBM_no_sent['Predicted high LGBM (no sentiments)'], 
                LGBM_w_sent['Predicted high LGBM (with sentiments)'],
                LR_no_sent['Predicted high LR (no sentiments)'],
                LR_w_sent['Predicted high LR (with sentiments)']], axis=1)

In [36]:
df.head()

Unnamed: 0,date,y_test,Predicted high RF (no sentiments),Predicted high RF (with sentiments),Predicted high LGBM (no sentiments),Predicted high LGBM (with sentiments),Predicted high LR (no sentiments),Predicted high LR (with sentiments)
0,2020-10-29,29.5,30.680769,30.628386,30.538,31.042,30.690351,30.706082
1,2020-10-30,29.5,29.751698,30.047123,29.599,29.91,29.786468,29.947549
2,2020-11-02,29.9,29.341476,29.294333,29.423,29.435,29.365212,29.406982
3,2020-11-03,28.4,28.555268,28.501932,28.579,28.927,28.607735,28.691628
4,2020-11-04,28.299999,28.630272,28.487993,28.525,28.539,28.614639,28.691019


In [37]:
# Merging results with the LSTM results
result = pd.merge(df, LSTM, how="left", on=["date", "date"])

In [38]:
result = result.drop('y_test', axis=1)

In [39]:
result.head()

Unnamed: 0,date,Predicted high RF (no sentiments),Predicted high RF (with sentiments),Predicted high LGBM (no sentiments),Predicted high LGBM (with sentiments),Predicted high LR (no sentiments),Predicted high LR (with sentiments),Actual High,Predicted high LSTM (no sentiments),Predicted high LSTM (with sentiments)
0,2020-10-29,30.680769,30.628386,30.538,31.042,30.690351,30.706082,,,
1,2020-10-30,29.751698,30.047123,29.599,29.91,29.786468,29.947549,,,
2,2020-11-02,29.341476,29.294333,29.423,29.435,29.365212,29.406982,,,
3,2020-11-03,28.555268,28.501932,28.579,28.927,28.607735,28.691628,,,
4,2020-11-04,28.630272,28.487993,28.525,28.539,28.614639,28.691019,,,


In [40]:
result.columns

Index(['date', 'Predicted high RF (no sentiments)',
       'Predicted high RF (with sentiments)',
       'Predicted high LGBM (no sentiments)',
       'Predicted high LGBM (with sentiments)',
       'Predicted high LR (no sentiments)',
       'Predicted high LR (with sentiments)', 'Actual High',
       'Predicted high LSTM (no sentiments)',
       'Predicted high LSTM (with sentiments)'],
      dtype='object')

In [41]:
melted_df = pd.melt(result, id_vars =['date'],
                     value_vars =['Actual High','Predicted high LSTM (no sentiments)',
                                  'Predicted high LSTM (with sentiments)',
                                 'Predicted high RF (no sentiments)', 
                                  'Predicted high RF (with sentiments)', 
                                  'Predicted high LGBM (no sentiments)', 
                                  'Predicted high LGBM (with sentiments)',
                                 'Predicted high LR (no sentiments)',
                                 'Predicted high LR (with sentiments)'],
                    var_name ='Method', value_name ='High price')

In [42]:
melted_df.head(5)

Unnamed: 0,date,Method,High price
0,2020-10-29,Actual High,
1,2020-10-30,Actual High,
2,2020-11-02,Actual High,
3,2020-11-03,Actual High,
4,2020-11-04,Actual High,


In [43]:
melted_df.Method.unique()

array(['Actual High', 'Predicted high LSTM (no sentiments)',
       'Predicted high LSTM (with sentiments)',
       'Predicted high RF (no sentiments)',
       'Predicted high RF (with sentiments)',
       'Predicted high LGBM (no sentiments)',
       'Predicted high LGBM (with sentiments)',
       'Predicted high LR (no sentiments)',
       'Predicted high LR (with sentiments)'], dtype=object)

In [44]:
# use the 538 theme
alt.themes.enable('fivethirtyeight')
alt.data_transformers.enable('default', max_rows=None)

DataTransformerRegistry.enable('default')

In [46]:
zoom = alt.selection_interval(encodings=["x", "y"])

palette = alt.Scale(domain=['Actual High', 'Predicted high LSTM (no sentiments)',
       'Predicted high LSTM (with sentiments)',
       'Predicted high RF (no sentiments)',
       'Predicted high RF (with sentiments)',
       'Predicted high LGBM (no sentiments)',
       'Predicted high LGBM (with sentiments)',
                           'Predicted high LR (no sentiments)','Predicted high LR (with sentiments)'],
                  range=['#330000', '#FF0000', '#0000FF', '#00FF00', '#FF8000','#663300','#808080', '#00FFFF', '#FF00FF'])

nearest = alt.selection_point(nearest=True, on='mouseover', clear='mouseout',
                        fields=['date'], empty=False)


minimap = (
    alt.Chart(melted_df)
    .mark_line()
    .add_params(zoom)
    .encode(
        x="date:T",
        y="High price:Q",
        color=alt.condition(zoom, "Method", alt.value("darkgrey")),
    )
    .properties(
        width=200,
        height=200,
        title="TV18BRDCST 'high'price"))

# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = alt.Chart(melted_df).mark_point().encode(
    x='date:T',
    opacity=alt.value(0),
).add_params(
    nearest
)

columns = sorted(melted_df.Method.unique())
selection = alt.selection_single(
    fields=['date'], nearest=True, on='mouseover', empty='none', clear='mouseout'
)

detail = (
    alt.Chart(melted_df)
    .mark_line()
    .encode(
        alt.X("date:T").scale(domain={"param": zoom.name, "encoding": "x"}),
        alt.Y("High price:Q").scale(domain={"param": zoom.name, "encoding": "y"}),
        color = alt.Color('Method:N',scale=palette, legend=alt.Legend(labelLimit = 400)),
    )
    .properties(width=600, height=400, title="TV18BRDCST 'high' stock price prediction results -- detail view")
)

points = detail.mark_point().transform_filter(selection)

rule = detail.transform_pivot(
    'Method', value='High price', groupby=['date']
).mark_rule().encode(
    opacity=alt.condition(selection, alt.value(0.3), alt.value(0)),
    tooltip=[alt.Tooltip(c, type='quantitative') for c in columns]
).add_selection(selection)

(detail | minimap).configure_legend(
    orient='none',direction = 'vertical',legendX = 650,  legendY = 250, offset=-200,
    symbolDirection='vertical',
        titleFontSize=16,
        labelFontSize=14
    ).configure_title(fontSize=18)





In [47]:
source = melted_df
zoom = alt.selection_interval(encodings=["x", "y"])

minimap = (
    alt.Chart(melted_df)
    .mark_line()
    .add_params(zoom)
    .encode(
        x="date:T",
        y="High price:Q",
        color=alt.condition(zoom, "Method", alt.value("darkgrey")),
    )
    .properties(
        width=200,
        height=200,
        ))

selectors = alt.Chart(melted_df).mark_point().encode(
    x='date:T',
    opacity=alt.value(0),
).add_params(
    nearest
)

base = alt.Chart(source).encode(x='date:T')
columns = sorted(source.Method.unique())
selection = alt.selection_single(
    fields=['date'], nearest=True, on='mouseover', empty='none', clear='mouseout'
)

lines = base.mark_line().encode(
        alt.X("date:T").scale(domain={"param": zoom.name, "encoding": "x"}),
        alt.Y("High price:Q").scale(domain={"param": zoom.name, "encoding": "y"}),
        color = alt.Color('Method:N',scale=palette, legend=alt.Legend(labelLimit = 400))
).properties(width=600, height=400, title="TV18BRDCST 'high' stock price prediction results -- detail view")

points = lines.mark_point().transform_filter(selection)

rule = base.transform_pivot(
    'Method', value='High price', groupby=['date']
).mark_rule().encode(
    opacity=alt.condition(selection, alt.value(0.3), alt.value(0)),
    tooltip=[alt.Tooltip(c, type='quantitative') for c in columns]
).add_selection(selection)

(lines + points + rule | minimap).configure_legend(
    orient='none',direction = 'vertical',legendX = 650,  legendY = 250, offset=-200,
    symbolDirection='vertical',
        titleFontSize=16,
        labelFontSize=14
    ).configure_title(fontSize=18)

In [48]:
!pip show altair

Name: altair
Version: 4.2.0
Summary: Altair: A declarative statistical visualization library for Python.
Home-page: http://altair-viz.github.io
Author: Brian E. Granger / Jake VanderPlas
Author-email: jakevdp@gmail.com
License: BSD 3-clause
Location: /Users/shamil/opt/anaconda3/lib/python3.9/site-packages
Requires: entrypoints, jinja2, jsonschema, numpy, pandas, toolz
Required-by: altair-data-server, altair-saver, altair-viewer, gpdvega
