### Load Libraries

In [14]:
from prophet import Prophet
import pandas as pd
from datetime import datetime
import plotly.express as px
import plotly.graph_objs as go
import numpy as np
import altair as alt
import time
import os

import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split

# load user defined libraries
import sys
sys.path.append('../../src/utils/')
from data_wrangler import create_all_features, fetch_topn_features, convert_custom_target_to_actual
from prophet_util import prepare_data_for_training, create_model, convert_predicitons_to_actual, combine_actual_and_predictions, prepare_data_for_predictions
from visualization import plot_prediction_range

### Define Data Paths

In [2]:
data_paths = {'COMBINED_FEATURES': '../../datasets/processed_data/combined_features/',              
                 'TICKERS': ['EIHOTEL.BO', 'ELGIEQUIP.BO', 'IPCALAB.BO', 'PGHL.BO', 'TV18BRDCST.BO'],
                 'FEATURE_PATH': '../../datasets/processed_data/feature_importance/LightGBM/',
                 'VISUALIZATION_PATH': '../../visualizations/',
                 'TOPIC_IDS': [33, 921, 495, 495, 921]
                }

train_size = 0.8  # 80% for training, 20% for testing
window_size = 10  # Number of past records to consider
target_price = 'ln_target'
ticker = data_paths['TICKERS'][0]
topic = data_paths['TOPIC_IDS'][0]
seed=42

### Extract Top Features by Importance

In [4]:
# fetch topn features as per feature importance
topn_features_df = fetch_topn_features(data_paths['FEATURE_PATH'], 50)

../../datasets/processed_data/feature_importance/LightGBM/


Unnamed: 0,shap_value_EIHOTEL.BO,feature,shap_value_ELGIEQUIP.BO,shap_value_IPCALAB.BO,shap_value_PGHL.BO,shap_value_TV18BRDCST.BO,avg_shap_value
0,2.4e-05,volatility_dch_100,-4.40445e-07,4e-06,1e-06,-6.561536e-06,2.2e-05
1,1.7e-05,trend_psar_up_indicator_20,-9.432521e-07,1e-06,1e-06,2.456542e-07,1.8e-05


### Plot Feature Importance

In [81]:
alt.Chart(topn_features_df).mark_bar(width=15).transform_calculate(
                        transform_shap='datum.avg_shap_value * 10000'
).encode(
        x=alt.X('feature:N', 
                sort='-y', axis=alt.Axis(title='Top 50 Features by Importance', labelAngle=-60, tickSize=0)),
        y=alt.Y('transform_shap:Q', 
                axis=alt.Axis(title='Average Tree SHAP Values', tickSize=0)),
        color=alt.Color('feature:N', legend=None,
                scale=alt.Scale(scheme='blues', reverse=True),
                sort='-y')
).properties(width=850, height=400, title={"text" : 'Feature Importance Rating for LightGBM',
                          "fontSize": 25,
                          "anchor":"start"}
).configure_view(strokeWidth=0
).configure_axis(labelFontSize=11, titleFontSize=20,
                 grid=False, domain=False
).configure_legend(labelLimit=0)