In [165]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.feature_selection import SelectKBest, f_regression

### 50/100/150 Feature Selection Analysis wiht Sentiment for EIHOTEL.BO

In [187]:
df = pd.read_csv('../../datasets/processed_data/combined_features/EIHOTEL.BO.csv')

In [188]:
df.columns

Index(['date', 'open', 'close', 'adj close', 'volume', 'low', 'high',
       'volume_adi', 'volume_obv', 'volume_cmf',
       ...
       'inr=x_percent_change', 'cl=f_percent_change',
       'treasury_yeild_10_years_percent_change', 'usdx-index_percent_change',
       '^nsei_percent_change', '^bsesn_percent_change', '^gspc_percent_change',
       'hsi_percent_change', 'sha_percent_change', '^sti_percent_change'],
      dtype='object', length=635)

In [189]:
def create_custom_target(df: pd.DataFrame) -> pd.DataFrame:
    """
    This function creates the custom target price, which is computed as ln(high/yesterday_close)
    """    
    # make a copy of the dataframe so as not to change the original dataframe
    data_df = df.copy()
    
    # create yesterday_close feature as
    data_df['yesterday_close'] = data_df['close'].shift(1)
    
    # create custom target price to predict, computing  ln(high/yesterday_close)
    data_df['ln_target'] = np.log(data_df['high'] / data_df['yesterday_close'])
    
    # as yesterday close would not be available for first day, 
    # we would not have custom target price for that day, which needs to be excluded 
    return(data_df.iloc[1:, ])

In [190]:
df = create_custom_target(df)

In [191]:
# Drop cols with NaN values
df = df.dropna(axis=1, how='all')
df.fillna(method='ffill', inplace=True)

In [192]:
combined_date_df = df['date']
train_date, test_date = train_test_split(combined_date_df, train_size=0.8, shuffle=False)

In [193]:
df_without_date = df.drop(columns=["date"])

In [194]:
df_without_date.dropna(inplace=True)

In [195]:
def timeseries_to_supervise(df, window_size, target):    
    X = []
    y = []
    indx = []
    no_records = len(df)
    #     
    for i in range(window_size, no_records):
        X.append(df.iloc[i-window_size:i].drop(target, axis=1).values.flatten())  # Collect past records as a sequence
        y.append(df.iloc[i][target])  # Next record as target variable
        indx.append(np.arange(i-window_size, i))

    X = pd.DataFrame(X)
    y = pd.Series(y)
    return(X, y, indx)

In [196]:
window_size = 10

In [197]:
# do train/test split the data with shuffle = False
train_data, test_data = train_test_split(df_without_date, train_size=0.8, shuffle=False)

# Identify the indices of the specified sentiment columns in the original dataset
sentiment_columns = ['agg_polarity', 'agg_compound', 'topic_polarity', 'topic_compound', 'ticker_polarity', 'ticker_compound']
column_indices = [train_data.columns.get_loc(col) for col in sentiment_columns if col in train_data.columns]

column_indices

[616, 618, 619, 621, 623, 622]

In [198]:
# convert timeseries to be used in supervise learning model
X_train, y_train, indx_train = timeseries_to_supervise(train_data, window_size, 'ln_target')  

# convert timeseries to be used in supervise learning model    
X_test, y_test, indx_test = timeseries_to_supervise(test_data, window_size, 'ln_target')  

In [199]:
# Use SelectKBest to select the top 44 features
selector = SelectKBest(score_func=f_regression, k=44)
X_train_44 = selector.fit_transform(X_train, y_train)
X_test_44 = selector.transform(X_test)

# Determine if the specified columns are among the top 44 features
selected_indices = selector.get_support(indices=True)
missing_indices = [idx for idx in column_indices if idx not in selected_indices]

# Add the specified columns if they are not among the top 44 features
for idx in missing_indices:
    extracted_column_train = X_train.iloc[:, idx].values.reshape(-1, 1)
    extracted_column_test = X_test.iloc[:, idx].values.reshape(-1, 1)
    X_train_44 = np.hstack([X_train_44, extracted_column_train])
    X_test_44 = np.hstack([X_test_44, extracted_column_test])

# If some of the specified columns were already in the top 44, 
# select additional top features to make the total count 50
remaining_indices = [i for i in range(X_train.shape[1]) if i not in selected_indices and i not in missing_indices]
num_additional_features_needed = 50 - X_train_44.shape[1]

if num_additional_features_needed > 0:
    additional_selector = SelectKBest(score_func=f_regression, k=num_additional_features_needed)
    additional_selector.fit(X_train.iloc[:, remaining_indices], y_train)
    X_train_additional = additional_selector.transform(X_train.iloc[:, remaining_indices])
    X_test_additional = additional_selector.transform(X_test.iloc[:, remaining_indices])

    X_train_44 = np.hstack([X_train_44, X_train_additional])
    X_test_44 = np.hstack([X_test_44, X_test_additional])

X_train_50 = X_train_44
X_test_50 = X_test_44

In [200]:
# Train the model on the top 50 features
model_50 = RandomForestRegressor(
    n_estimators=200,
    min_samples_split=2,
    min_samples_leaf=2,
    bootstrap=True
)
model_50.fit(X_train_50, y_train)

In [201]:
def convert_custom_target_to_actual_for_supervise(df: pd.DataFrame, window: int, y: "pd.Series[int]") -> "pd.Series[int]":
    """
    this module converts custom target - ln(high/yesterday_close) to actual high price again for timeseries converted data using rolling window of size 10
    """
    data_df = df.copy()
    
    # exclude first 10 rows of train/test data, as while us
    
    y = np.exp(y) * data_df.loc[data_df.index[window:], 'yesterday_close'].reset_index(drop=True)
    return(y)    

In [202]:
def evaluate_model(model, window, test_data, test_date, X_test, y_test):
    
    # do target prediction using the provide model
    y_pred = model.predict(X_test)

    # convert back to original value, before computing mape            
    y_test = convert_custom_target_to_actual_for_supervise(test_data, window, y_test)
    y_pred = convert_custom_target_to_actual_for_supervise(test_data, window, y_pred)

    test_dates = test_date[window:].reset_index(drop=True)
    predictions_df = pd.DataFrame({'date': test_dates, 'y_test': y_test, 'y_pred': y_pred})

    # compute regression metric - mape 
    mape = mean_absolute_percentage_error(y_test, y_pred)

    # compute rmse metric
    rmse = mean_squared_error(y_test, y_pred, squared=False)        
    return(predictions_df, mape, rmse)

In [203]:
predictions_df, mape, rmse = evaluate_model(model_50, window_size, test_data, test_date, X_test_50, y_test)

In [204]:
predictions_df, mape, rmse

(           date      y_test      y_pred
 0    2020-06-23   74.900002   74.200981
 1    2020-06-24   74.949997   77.843404
 2    2020-06-25   68.900002   72.411023
 3    2020-06-26   69.000000   69.705739
 4    2020-06-29   68.750000   68.643302
 ..          ...         ...         ...
 722  2023-05-24  205.600006  205.035468
 723  2023-05-25  215.850006  205.059525
 724  2023-05-26  207.699997  211.208415
 725  2023-05-30  207.850006  210.837331
 726  2023-05-31  210.750000  212.903940
 
 [727 rows x 3 columns],
 0.02269160865282945,
 3.8902816054724156)

In [205]:
print(f"MAPE for model with top 50 features: {mape*100:.2f}%")

MAPE for model with top 50 features: 2.27%


In [207]:
predictions_df.to_csv("rf_EIHOTEL.csv", index=False)

In [186]:
import altair as alt

alt.themes.enable('fivethirtyeight')

predictions_df_copy = predictions_df.copy(deep=True)


predictions_df_copy['date'] = pd.to_datetime(predictions_df_copy['date'])

predictions_df_copy['label'] = 'Actual'
predictions_df_copy['predicted_label'] = 'Predicted'

# Actual high price line
line1 = alt.Chart(predictions_df_copy).mark_line(strokeWidth=2).encode(
    x='date:T',
    y=alt.Y('y_test:Q', title='Price', scale=alt.Scale(zero=False)),
    color=alt.Color('label:N', legend=alt.Legend(title="Line Type")),
    tooltip=['date', 'y_test', 'y_pred']
)

# Predicted high price line
line2 = alt.Chart(predictions_df_copy).mark_line(strokeWidth=1, strokeDash=[3, 3]).encode(
    x='date:T',
    y=alt.Y('y_pred:Q', title='', scale=alt.Scale(zero=False)),
    color=alt.Color('predicted_label:N', legend=alt.Legend(title="Line Type")),
    tooltip=['date', 'y_test', 'y_pred']
)

# Combine the two lines
chart = alt.layer(line1, line2).properties(
    title='Actual vs Predicted High Prices',
    width=650,
    height=400
).interactive()

chart

In [164]:
feature_names = df.columns.tolist()

# Get feature importances
importances = model_50.feature_importances_

# Map importances to corresponding feature names
feature_importance_mapping = dict(zip(feature_names, importances))

# Sort by importance
sorted_features = sorted(feature_importance_mapping.items(), key=lambda x: x[1], reverse=True)

# Display top 10
for feature, importance in sorted_features[:50]:
    print(f"{feature}: {importance}")


trend_sma_fast: 0.06605542408678328
trend_macd_diff: 0.06593938503708437
volatility_atr: 0.0628020349786367
trend_ema_slow: 0.0597868128242998
trend_sma_slow: 0.05457298863186489
volume_sma_em: 0.05328792665774465
volatility_kcp: 0.05322792890204576
volatility_kcli: 0.04883111058403793
volatility_ui: 0.04804630766020769
volatility_bbw: 0.04779665703816499
date: 0.0476082757515509
volume: 0.04721108541032013
trend_macd: 0.04697887920173681
open: 0.042932457616645305
close: 0.030801087174088318
trend_mass_index: 0.008616283286095748
trend_ema_fast: 0.008455453196692985
volatility_dcm: 0.00821750269450625
volume_cmf: 0.008001898851365367
trend_vortex_ind_diff: 0.007862252221430537
trend_vortex_ind_neg: 0.007709093795392803
volatility_bbp: 0.007641835414847084
volatility_kcl: 0.007518032948080934
trend_vortex_ind_pos: 0.007479313216164898
volatility_bbli: 0.007117536941963803
volatility_kchi: 0.007108193432147724
trend_macd_signal: 0.007084910190143151
volume_obv: 0.006948408025492222
high

Repeat the process for the top 100 features of EIHOTEL.BO

In [340]:
selector = SelectKBest(score_func=f_regression, k=100)

In [341]:
selector.fit(X_train, y_train)

In [342]:
X_train_100 = selector.transform(X_train)
X_test_100 = selector.transform(X_test)

In [343]:
model_100 = RandomForestRegressor()
model_100.fit(X_train_100, y_train)

In [344]:
# Predict on the training data
y_train_pred_100 = model_100.predict(X_train_100)

# Predict on the testing data
y_test_pred_100 = model_100.predict(X_test_100)

# Calculate MSE and R^2 for the training data
mse_train_100 = mean_squared_error(y_train, y_train_pred_100)
r2_train_100 = r2_score(y_train, y_train_pred_100)

# Calculate MSE and R^2 for the testing data
mse_test_100 = mean_squared_error(y_test, y_test_pred_100)
r2_test_100 = r2_score(y_test, y_test_pred_100)

mse_train_100, r2_train_100, mse_test_100, r2_test_100

(1.8266978434701696e-05,
 0.9683592111555606,
 0.00011795422453992068,
 0.7710554665649287)

In [345]:
# Calculate RMSE for the training data
rmse_train_100 = np.sqrt(mse_train_100)

# Calculate RMSE for the testing data
rmse_test_100 = np.sqrt(mse_test_100)

rmse_train_100, rmse_test_100

(0.00427398858616886, 0.010860673300487437)

In [346]:
mape_value_100 = mape(y_test, y_test_pred_100)
print(f"MAPE for model with top 100 features: {mape_value_100:.2f}%")


MAPE for model with top 100 features: 79.83%


Repeat the process for the top 150 features of EIHOTEL.BO

In [347]:
selector = SelectKBest(score_func=f_regression, k=150)

In [348]:
selector.fit(X_train, y_train)

In [349]:
X_train_150 = selector.transform(X_train)
X_test_150 = selector.transform(X_test)

In [350]:
model_150 = RandomForestRegressor()
model_150.fit(X_train_150, y_train)

In [351]:
# Predict on the training data
y_train_pred_150 = model_150.predict(X_train_150)

# Predict on the testing data
y_test_pred_150 = model_150.predict(X_test_150)

# Calculate MSE and R^2 for the training data
mse_train_150 = mean_squared_error(y_train, y_train_pred_150)
r2_train_150 = r2_score(y_train, y_train_pred_150)

# Calculate MSE and R^2 for the testing data
mse_test_150 = mean_squared_error(y_test, y_test_pred_150)
r2_test_150 = r2_score(y_test, y_test_pred_150)

mse_train_150, r2_train_150, mse_test_150, r2_test_150

(1.7031339575836034e-05,
 0.9704994987987036,
 0.00011319978225331366,
 0.7802836529677211)

In [352]:
# Calculate RMSE for the training data
rmse_train_100 = np.sqrt(mse_train_100)

# Calculate RMSE for the testing data
rmse_test_100 = np.sqrt(mse_test_100)

rmse_train_100, rmse_test_100

(0.00427398858616886, 0.010860673300487437)

In [353]:
mape_value_150 = mape(y_test, y_test_pred_150)
print(f"MAPE for model with top 150 features: {mape_value_150:.2f}%")

MAPE for model with top 150 features: 85.86%


### 50/100/150 Feature Selection Analysis wihtout Sentiment for EIHOTEL.BO

In [160]:
df = pd.read_csv('../../datasets/processed_data/combined_features/EIHOTEL.BO.csv')

In [128]:
df_without_sentiment = df.drop(columns=['agg_polarity', 'agg_compound', 'topic_polarity', 'topic_compound', 'ticker_polarity', 'ticker_compound'])

In [129]:
df_without_sentiment = create_custom_target(df_without_sentiment)

In [130]:
# Drop cols with NaN values
df_without_sentiment = df_without_sentiment.dropna(axis=1, how='all')
df_without_sentiment.fillna(method='ffill', inplace=True)

In [131]:
combined_date_df = df_without_sentiment['date']
train_date, test_date = train_test_split(combined_date_df, train_size=0.8, shuffle=False)
dev_date, test_date = train_test_split(test_date, train_size=0.5, shuffle=False)

In [132]:
df_without_date = df_without_sentiment.drop(columns=["date"])

In [133]:
df_without_date.dropna(inplace=True)

In [134]:
# do train/test split the data with shuffle = False
train_data, test_data = train_test_split(df_without_date, train_size=0.8, shuffle=False)

# convert timeseries to be used in supervise learning model
X_train, y_train, indx_train = timeseries_to_supervise(train_data, window_size, 'ln_target')  

# convert timeseries to be used in supervise learning model    
X_test, y_test, indx_test = timeseries_to_supervise(test_data, window_size, 'ln_target')  

In [135]:
selector = SelectKBest(score_func=f_regression, k=50)
X_train_50 = selector.fit_transform(X_train, y_train)
X_test_50 = selector.transform(X_test)

In [136]:
# Train the model on the top 50 features
model_50 = RandomForestRegressor(
    n_estimators=200,
    min_samples_split=2,
    min_samples_leaf=2,
    bootstrap=True
)
model_50.fit(X_train_50, y_train)

In [137]:
predictions_df, mape, rmse = evaluate_model(model_50, window_size, test_data, test_date, X_test_50, y_test)

In [103]:
predictions_df, mape, rmse

(           date      y_test      y_pred
 0    2020-06-23   74.900002   73.954376
 1    2020-06-24   74.949997   77.638945
 2    2020-06-25   68.900002   72.331000
 3    2020-06-26   69.000000   69.921408
 4    2020-06-29   68.750000   68.836272
 ..          ...         ...         ...
 353  2021-11-23  138.600006  141.517175
 354  2021-11-24  139.600006  141.363915
 355  2021-11-25  138.699997  142.206651
 356  2021-11-26  135.699997  141.986574
 357  2021-11-29  128.300003  131.792770
 
 [358 rows x 3 columns],
 0.020174440426755404,
 2.712012490832198)

In [None]:
predictions_df.to_csv("rf_EIHOTEL_without_senitment.csv", index=False)

In [104]:
print(f"MAPE for model with top 50 features without sentiment features: {mape*100:.2f}%")

MAPE for model with top 50 features without sentiment features: 2.02%


In [105]:
import altair as alt

alt.themes.enable('fivethirtyeight')

predictions_df['date'] = pd.to_datetime(predictions_df['date'])

predictions_df['label'] = 'Actual'
predictions_df['predicted_label'] = 'Predicted'

# Actual high price line
line1 = alt.Chart(predictions_df).mark_line(strokeWidth=2).encode(
    x='date:T',
    y=alt.Y('y_test:Q', title='Price', scale=alt.Scale(zero=False)),
    color=alt.Color('label:N', legend=alt.Legend(title="Line Type")),
    tooltip=['date', 'y_test', 'y_pred']
)

# Predicted high price line
line2 = alt.Chart(predictions_df).mark_line(strokeWidth=1, strokeDash=[3, 3]).encode(
    x='date:T',
    y=alt.Y('y_pred:Q', title='', scale=alt.Scale(zero=False)),
    color=alt.Color('predicted_label:N', legend=alt.Legend(title="Line Type")),
    tooltip=['date', 'y_test', 'y_pred']
)

# Combine the two lines
chart = alt.layer(line1, line2).properties(
    title='Actual vs Predicted High Prices',
    width=650,
    height=400
).interactive()

chart

In [None]:
feature_names = df.columns.tolist()

# Get feature importances
importances = model_50.feature_importances_

# Map importances to corresponding feature names
feature_importance_mapping = dict(zip(feature_names, importances))

# Sort by importance
sorted_features = sorted(feature_importance_mapping.items(), key=lambda x: x[1], reverse=True)

# Display top 10
for feature, importance in sorted_features[:10]:
    print(f"{feature}: {importance}")

Repeat the process for the top 100 features without sentiment of EIHOTEL.BO

In [365]:
selector = SelectKBest(score_func=f_regression, k=100)

In [366]:
selector.fit(X_train, y_train)

In [367]:
# Select the top 100 features without sentiment
X_train_100_without_sentiment = selector.transform(X_train)
X_test_100_without_sentiment = selector.transform(X_test)

In [368]:
# Train the model on the top 100 features without sentiment
model_100_without_sentiment = RandomForestRegressor()
model_100_without_sentiment.fit(X_train_100_without_sentiment, y_train)

In [369]:
# Predict on the training data
y_train_pred_100_without_sentiment = model_100_without_sentiment.predict(X_train_100_without_sentiment)

# Predict on the testing data
y_test_pred_100_without_sentiment = model_100_without_sentiment.predict(X_test_100_without_sentiment)

# Calculate MSE and R^2 for the training data
mse_train_100_without_sentiment = mean_squared_error(y_train, y_train_pred_100_without_sentiment)
r2_train_100_without_sentiment = r2_score(y_train, y_train_pred_100_without_sentiment)

# Calculate MSE and R^2 for the testing data
mse_test_100_without_sentiment = mean_squared_error(y_test, y_test_pred_100_without_sentiment)
r2_test_100_without_sentiment = r2_score(y_test, y_test_pred_100_without_sentiment)

mse_train_100_without_sentiment, r2_train_100_without_sentiment, mse_test_100_without_sentiment, r2_test_100_without_sentiment

(1.8066885372463017e-05,
 0.9687057984334819,
 0.00011609741181401809,
 0.7746594673954729)

In [370]:
# Calculate RMSE for the training data
rmse_train_100_without_sentiment = np.sqrt(mse_train_100_without_sentiment)

# Calculate RMSE for the testing data
rmse_test_100_without_sentiment = np.sqrt(mse_test_100_without_sentiment)

rmse_train_100_without_sentiment, rmse_test_100_without_sentiment

(0.004250515894860648, 0.010774850895210481)

In [379]:
mape_value_100_without_sentiment = mape(y_test, y_test_pred_100_without_sentiment)

print(f"MAPE for model with top 100 features without sentiment: {mape_value_100_without_sentiment:.2f}%")

MAPE for model with top 100 features without sentiment: 78.64%
78.63537079382344


Repeat the process for the top 150 features without sentiment of EIHOTEL.BO

In [372]:
selector = SelectKBest(score_func=f_regression, k=150)

In [373]:
selector.fit(X_train, y_train)

In [374]:
# Select the top 150 features without sentiment
X_train_150_without_sentiment = selector.transform(X_train)
X_test_150_without_sentiment = selector.transform(X_test)

In [375]:
# Train the model on the top 150 features without sentiment
model_150_without_sentiment = RandomForestRegressor()
model_150_without_sentiment.fit(X_train_150_without_sentiment, y_train)

In [376]:
# Predict on the training data
y_train_pred_150_without_sentiment = model_150_without_sentiment.predict(X_train_150_without_sentiment)

# Predict on the testing data
y_test_pred_150_without_sentiment = model_150_without_sentiment.predict(X_test_150_without_sentiment)

# Calculate MSE and R^2 for the training data
mse_train_150_without_sentiment = mean_squared_error(y_train, y_train_pred_150_without_sentiment)
r2_train_150_without_sentiment = r2_score(y_train, y_train_pred_150_without_sentiment)

# Calculate MSE and R^2 for the testing data
mse_test_150_without_sentiment = mean_squared_error(y_test, y_test_pred_150_without_sentiment)
r2_test_150_without_sentiment = r2_score(y_test, y_test_pred_150_without_sentiment)

mse_train_150_without_sentiment, r2_train_150_without_sentiment, mse_test_150_without_sentiment, r2_test_150_without_sentiment

(1.7643287217080387e-05,
 0.969439525680014,
 0.00011351735887073847,
 0.779667249182356)

In [377]:
# Calculate RMSE for the training data
rmse_train_150_without_sentiment = np.sqrt(mse_train_150_without_sentiment)

# Calculate RMSE for the testing data
rmse_test_150_without_sentiment = np.sqrt(mse_test_150_without_sentiment)

rmse_train_150_without_sentiment, rmse_test_150_without_sentiment

(0.0042003913171370574, 0.01065445253735444)

In [378]:
mape_value_150_without_sentiment = mape(y_test, y_test_pred_150_without_sentiment)

print(f"MAPE for model with top 150 features without sentiment: {mape_value_150_without_sentiment:.2f}%")

MAPE for model with top 150 features without sentiment: 86.23%


### An explaination of key features:
volume_vwap: Volume Weighted Average Price (VWAP) - an average price based on both volume and price. Commonly used as a benchmark.

volume_vpt: Volume Price Trend - it indicates the amount of volume, which can be attributed to the trend's direction.

date: Represents the specific date for each data entry.

adj close: Adjusted closing price accounts for events such as dividends, stock splits, and new stock offerings.

volume_adi: Accumulation/Distribution Index - relates volume to price change.

high: The highest price of the stock during a particular period.

volatility_bbh: Bollinger Bands High - a volatility indicator, representing the upper band.

volume_sma_em: Volume Simple Moving Average Exponential - the exponential moving average of volume.

volatility_dcw: Donchian Channel Width - shows the size of the Donchian Channel, indicating volatility.

volume_obv: On-Balance Volume - uses volume flow to predict price changes.

volume_em: Volume Exponential Moving - the exponential moving average of volume.

volume: The number of shares or contracts traded in a stock or an entire market during a given period.

volume_fi: Force Index - measures the pressure of either buying or selling.

open: The price at which a stock started trading during a particular period.

trend_mass_index: Mass Index - identifies reversals when the range widens or narrows.

volatility_bbm: Bollinger Bands Middle - the middle band of the Bollinger Bands.

volatility_ui: Ulcer Index - measures downside risk.

volume_cmf: Chaikin Money Flow - measures the amount of Money Flow Volume over a certain period.

volatility_kcw: Keltner Channel Width - shows the width between Keltner Channels, indicating volatility.

volatility_kcc: Keltner Channel Central - the middle line of the Keltner Channel.

volume_nvi: Negative Volume Index - focuses on periods where volume has decreased from the previous period.

volatility_bbli: Bollinger Bands Low Indicator - a volatility indicator, representing the lower band.

trend_vortex_ind_diff: Vortex Indicator Difference - shows the difference between VI+ and VI-.

volatility_bbw: Bollinger Bandwidth - shows the width between the Bollinger Bands.

trend_trix: TRIX - shows the percent rate of change of a triple exponentially smoothed moving average.

trend_ema_slow: Exponential Moving Average Slow - places a greater weight and significance on the most recent data points.

trend_vortex_ind_pos: Vortex Indicator Positive - an indicator used for spotting trend reversals.

volatility_kcl: Keltner Channel Low - the lower band of the Keltner Channel.

volatility_atr: Average True Range - measures stock volatility.

volatility_bbp: Bollinger Bands Percentage - relates the current price to the Bollinger Bands levels.

volatility_kch: Keltner Channel High - the upper band of the Keltner Channel.

volatility_kcli: Keltner Channel Low Indicator - the lower channel line of the Keltner Channel.

volatility_dcl: Donchian Channel Low - the lowest price of the last 'n' days.

volatility_kchi: Keltner Channel High Indicator - the upper channel line of the Keltner Channel.

volatility_bbhi: Bollinger Bands High Indicator - indicates if the price is above the upper Bollinger band.

trend_macd: Moving Average Convergence Divergence - shows the relationship between two moving averages of a stock’s price.

trend_sma_fast: Simple Moving Average Fast - the average stock price over a specific period, giving equal weight to every daily price.

trend_sma_slow: Simple Moving Average Slow - similar to SMA fast but considers a longer period.

trend_ema_fast: Exponential Moving Average Fast - like SMA but gives more weight to recent prices.

volatility_dcm: Donchian Channel Middle - the average of the Donchian high and low.

volatility_dch: Donchian Channel High - the highest price of the last 'n' days.

trend_macd_diff: The difference between MACD and its signal line.

trend_macd_signal: The signal line of the MACD, which is an EMA of the MACD.

low: The lowest price of the stock during a particular period.

close: The final price at which the stock is traded during a particular period.