Feature Engineering and Dataload

In [None]:
import pandas as pd

# # Load the datasets
# bbca_df = pd.read_csv('BBCA_clean.csv')
# bbni_df = pd.read_csv('BBNI_clean.csv')
# bbri_df = pd.read_csv('BBRI_clean.csv')
# bmri_df = pd.read_csv('BMRI_clean.csv')

# # Feature engineering functions
# def feature_engineering(df):
#     df['H-L'] = df['High'] - df['Low']
#     df['O-C'] = df['Close'] - df['Open']
#     df['7 DAYS MA'] = df['Close'].rolling(window=7).mean()
#     df['14 DAYS MA'] = df['Close'].rolling(window=14).mean()
#     df['21 DAYS MA'] = df['Close'].rolling(window=21).mean()
#     df['7 DAYS STD DEV'] = df['Close'].rolling(window=7).std()
#     return df

# # Apply feature engineering to each dataframe
# bbca_df = feature_engineering(bbca_df)
# bbni_df = feature_engineering(bbni_df)
# bbri_df = feature_engineering(bbri_df)
# bmri_df = feature_engineering(bmri_df)

# # Save the modified datasets
# bbca_df.to_csv('BBCA_clean_modified.csv', index=False)
# bbni_df.to_csv('BBNI_clean_modified.csv', index=False)
# bbri_df.to_csv('BBRI_clean_modified.csv', index=False)
# bmri_df.to_csv('BMRI_clean_modified.csv', index=False)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import plotly.graph_objects as go
import pandas as pd
from joblib import dump
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
bbca = pd.read_csv('BBCA_clean_modified.csv')
bbni = pd.read_csv('BBNI_clean_modified.csv')
bbri = pd.read_csv('BBRI_clean_modified.csv')
bmri = pd.read_csv('BMRI_clean_modified.csv')

In [None]:
bbca.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'H-L',
       'O-C', '7 DAYS MA', '14 DAYS MA', '21 DAYS MA', '7 DAYS STD DEV'],
      dtype='object')

In [None]:
bbca.head(21)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,H-L,O-C,7 DAYS MA,14 DAYS MA,21 DAYS MA,7 DAYS STD DEV
0,2019-01-02,3700.0,3700.0,3625.0,3662.5,2965.774658,45319200,75.0,-37.5,,,,
1,2019-01-03,3637.5,3675.0,3550.0,3587.5,2905.042236,74174000,125.0,-50.0,,,,
2,2019-01-04,3587.5,3725.0,3575.0,3725.0,3016.38501,78514000,150.0,137.5,,,,
3,2019-01-07,3750.0,3787.5,3712.5,3712.5,3006.263184,53624800,75.0,-37.5,,,,
4,2019-01-08,3712.5,3737.5,3675.0,3675.0,2975.896729,80073200,62.5,-37.5,,,,
5,2019-01-09,3750.0,3787.5,3700.0,3787.5,3066.995605,104903400,87.5,37.5,,,,
6,2019-01-10,3812.5,3850.0,3775.0,3850.0,3117.606201,130603200,75.0,37.5,3714.285714,,,85.82582
7,2019-01-11,3875.0,3900.0,3837.5,3850.0,3117.606201,88899400,62.5,-25.0,3741.071429,,,95.664966
8,2019-01-14,3875.0,3875.0,3825.0,3850.0,3117.606201,71320000,50.0,-25.0,3778.571429,,,74.552235
9,2019-01-15,3875.0,3900.0,3825.0,3900.0,3158.094727,86697200,75.0,25.0,3803.571429,,,82.510822


Perlu Start dari hari ke 21 karena limitasi dari feature engineering

In [None]:
features=['Open', 'High', 'Low', 'Volume', 'H-L', 'O-C', '7 DAYS MA', '14 DAYS MA', '21 DAYS MA', '7 DAYS STD DEV']
len(features)

10

In [None]:
def preprocess_data(df):
    df = df.dropna().reset_index(drop=True)
    features = df[['Open', 'High', 'Low', 'Volume', 'H-L', 'O-C', '7 DAYS MA', '14 DAYS MA', '21 DAYS MA', '7 DAYS STD DEV']]
    target = df['Close']
    scaler = StandardScaler()
    features = scaler.fit_transform(features)
    return train_test_split(features, target, test_size=0.2, random_state=42), df['Date']

In [None]:
def visualize_model_performance(dates, y_train, y_test, y_pred, title):
    # Prepare the date ranges
    train_dates = dates[:len(y_train)] if y_train is not None else []
    test_dates = dates[len(train_dates):len(train_dates) + len(y_test)]

    # Create the figure
    fig = go.Figure()

    # Add traces for actual values and predictions
    fig.add_trace(go.Scatter(x=train_dates, y=y_train, mode='markers', name='Train Actual', marker=dict(color='gray')))
    fig.add_trace(go.Scatter(x=test_dates, y=y_test, mode='markers', name='Validation Actual', marker=dict(color='red')))
    fig.add_trace(go.Scatter(x=dates, y=y_pred, mode='lines', name='Predictions', line=dict(color='orange')))

    # Update layout
    fig.update_layout(
        title=title,
        xaxis=dict(
            rangeslider=dict(
                visible=True
            ),
            type='date'
        ),
        yaxis=dict(title='Close Price IDR')
    )

    # Show the plot
    fig.show()

In [None]:
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)

    train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
    test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))

    train_mae = mean_absolute_error(y_train, train_predictions)
    test_mae = mean_absolute_error(y_test, test_predictions)

    all_predictions = np.concatenate([train_predictions, test_predictions])

    return train_rmse, test_rmse, train_mae, test_mae, all_predictions

In [None]:
import os

def process_dataset(df, dataset_name):
    (X_train, X_test, y_train, y_test), dates = preprocess_data(df)

    # Determine number of components for PCA to retain 95% of variance
    pca = PCA(n_components=0.95)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)

    # Train and evaluate Random Forest Regressor for PCA
    rf_model_pca = RandomForestRegressor(random_state=42)
    rf_train_rmse_pca, rf_test_rmse_pca, rf_train_mae_pca, rf_test_mae_pca, rf_predictions_pca = evaluate_model(rf_model_pca, X_train_pca, X_test_pca, y_train, y_test)

    # Train and evaluate Linear Regression for PCA
    lr_model_pca = LinearRegression()
    lr_train_rmse_pca, lr_test_rmse_pca, lr_train_mae_pca, lr_test_mae_pca, lr_predictions_pca = evaluate_model(lr_model_pca, X_train_pca, X_test_pca, y_train, y_test)

    # Save the models to disk
    model_directory = "saved_models"
    os.makedirs(model_directory, exist_ok=True)
    dump(rf_model_pca, os.path.join(model_directory, f'rf_model_pca_{dataset_name}.joblib'))
    dump(lr_model_pca, os.path.join(model_directory, f'lr_model_pca_{dataset_name}.joblib'))

    # Concatenate train and test sets for actual values
    y_true = np.concatenate([y_train, y_test])

    # Visualize model performance for PCA
    visualize_model_performance(dates, y_train, y_test, rf_predictions_pca, f'Random Forest Model with PCA for {dataset_name}')
    visualize_model_performance(dates, y_train, y_test, lr_predictions_pca, f'Linear Regression Model with PCA for {dataset_name}')

    return {
        'Dataset': dataset_name,
        'RF_Train_RMSE_PCA': rf_train_rmse_pca,
        'RF_Test_RMSE_PCA': rf_test_rmse_pca,
        'RF_Train_MAE_PCA': rf_train_mae_pca,
        'RF_Test_MAE_PCA': rf_test_mae_pca,
        'LR_Train_RMSE_PCA': lr_train_rmse_pca,
        'LR_Test_RMSE_PCA': lr_test_rmse_pca,
        'LR_Train_MAE_PCA': lr_train_mae_pca,
        'LR_Test_MAE_PCA': lr_test_mae_pca,
    }


In [None]:
# Result all dataset
results = []

Results Both LR and RF

BBCA

In [None]:
BBCARES=process_dataset(bbca, 'BBCA')

In [None]:
results.append(BBCARES)
pd.DataFrame(BBCARES, index=[0])

Unnamed: 0,Dataset,RF_Train_RMSE_PCA,RF_Test_RMSE_PCA,RF_Train_MAE_PCA,RF_Test_MAE_PCA,LR_Train_RMSE_PCA,LR_Test_RMSE_PCA,LR_Train_MAE_PCA,LR_Test_MAE_PCA
0,BBCA,24.368147,69.989954,17.075382,48.567683,61.825945,65.438642,45.863238,48.803874


BBNI

In [None]:
BBNIRES=process_dataset(bbni, 'BBNI')

In [None]:
results.append(BBNIRES)
pd.DataFrame(BBNIRES, index=[0])

Unnamed: 0,Dataset,RF_Train_RMSE_PCA,RF_Test_RMSE_PCA,RF_Train_MAE_PCA,RF_Test_MAE_PCA,LR_Train_RMSE_PCA,LR_Test_RMSE_PCA,LR_Train_MAE_PCA,LR_Test_MAE_PCA
0,BBNI,29.04154,80.903199,20.263635,56.573205,66.072413,65.239914,48.328434,48.781701


BBRI

In [None]:
BBRIRES=process_dataset(bbri, 'BBRI')

In [None]:
results.append(BBRIRES)
pd.DataFrame(BBRIRES, index=[0])

Unnamed: 0,Dataset,RF_Train_RMSE_PCA,RF_Test_RMSE_PCA,RF_Train_MAE_PCA,RF_Test_MAE_PCA,LR_Train_RMSE_PCA,LR_Test_RMSE_PCA,LR_Train_MAE_PCA,LR_Test_MAE_PCA
0,BBRI,32.756793,91.824529,22.368578,62.486992,79.069157,80.152601,57.919219,60.04627


BMRI

In [None]:
BMRIRES=process_dataset(bmri, 'BMRI')

In [None]:
results.append(BMRIRES)
pd.DataFrame(BMRIRES, index=[0])

Unnamed: 0,Dataset,RF_Train_RMSE_PCA,RF_Test_RMSE_PCA,RF_Train_MAE_PCA,RF_Test_MAE_PCA,LR_Train_RMSE_PCA,LR_Test_RMSE_PCA,LR_Train_MAE_PCA,LR_Test_MAE_PCA
0,BMRI,33.284662,90.45639,23.355714,65.855488,81.848405,84.69792,59.744225,64.344935


Overall Evaluation Metrics

In [None]:
resultsall=pd.DataFrame(results)
resultsall

Unnamed: 0,Dataset,RF_Train_RMSE_PCA,RF_Test_RMSE_PCA,RF_Train_MAE_PCA,RF_Test_MAE_PCA,LR_Train_RMSE_PCA,LR_Test_RMSE_PCA,LR_Train_MAE_PCA,LR_Test_MAE_PCA
0,BBCA,24.368147,69.989954,17.075382,48.567683,61.825945,65.438642,45.863238,48.803874
1,BBNI,29.04154,80.903199,20.263635,56.573205,66.072413,65.239914,48.328434,48.781701
2,BBRI,32.756793,91.824529,22.368578,62.486992,79.069157,80.152601,57.919219,60.04627
3,BMRI,33.284662,90.45639,23.355714,65.855488,81.848405,84.69792,59.744225,64.344935


**SUPPORT VECTOR REGRESSION (SVR)**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.decomposition import PCA
from joblib import dump
import plotly.graph_objects as go

In [None]:
def preprocess_data(df):
    df = df.dropna().reset_index(drop=True)
    X = df[features]
    y = df['Close']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return train_test_split(X_scaled, y, test_size=0.2, random_state=42), df['Date']

In [None]:
def visualize_model_performance(dates, y_train, y_test, y_pred, title):
    train_dates = dates[:len(y_train)] if y_train is not None else []
    test_dates = dates[len(train_dates):len(train_dates) + len(y_test)]
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=train_dates, y=y_train, mode='markers', name='Train Actual', marker=dict(color='gray')))
    fig.add_trace(go.Scatter(x=test_dates, y=y_test, mode='markers', name='Validation Actual', marker=dict(color='red')))
    fig.add_trace(go.Scatter(x=dates, y=y_pred, mode='lines', name='Predictions', line=dict(color='orange')))
    fig.update_layout(
        title=title,
        xaxis=dict(rangeslider=dict(visible=True), type='date'),
        yaxis=dict(title='Close Price IDR')
    )
    fig.show()

In [None]:
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
    test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))
    train_mae = mean_absolute_error(y_train, train_predictions)
    test_mae = mean_absolute_error(y_test, test_predictions)
    all_predictions = np.concatenate([train_predictions, test_predictions])
    return train_rmse, test_rmse, train_mae, test_mae, all_predictions

In [None]:
def process_dataset(df, dataset_name):
    (X_train, X_test, y_train, y_test), dates = preprocess_data(df)

    # Apply PCA to reduce dimensionality
    pca = PCA(n_components=0.95)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)

    # Extended Hyperparameter tuning for SVR with PCA
    param_grid = {
        'C': [0.1, 1, 10, 100, 1000],
        'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
        'epsilon': [0.001, 0.01, 0.1, 1]
    }
    svr = SVR(kernel='rbf')
    grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train_pca, y_train)

    best_svr_model = grid_search.best_estimator_
    svr_train_rmse, svr_test_rmse, svr_train_mae, svr_test_mae, svr_predictions = evaluate_model(best_svr_model, X_train_pca, X_test_pca, y_train, y_test)

    # Save the best SVR model
    dump(best_svr_model, f'{dataset_name}_best_svr_model.joblib')

    y_true = np.concatenate([y_train, y_test])
    visualize_model_performance(dates, y_train, y_test, svr_predictions, f'SVR Model with PCA for {dataset_name}')

    return {
        'Dataset': dataset_name,
        'SVR_Train_RMSE_PCA': svr_train_rmse,
        'SVR_Test_RMSE_PCA': svr_test_rmse,
        'SVR_Train_MAE_PCA': svr_train_mae,
        'SVR_Test_MAE_PCA': svr_test_mae
    }

In [None]:
results = []

In [None]:
BBCARES = process_dataset(bbca, 'BBCA')
results.append(BBCARES)

In [None]:
BBNIRES = process_dataset(bbni, 'BBNI')
results.append(BBNIRES)

In [None]:
BBRIRES = process_dataset(bbri, 'BBRI')
results.append(BBRIRES)

In [None]:
BMRIRES = process_dataset(bmri, 'BMRI')
results.append(BMRIRES)

In [None]:
resultsall = pd.DataFrame(results)
print(resultsall)

  Dataset  SVR_Train_RMSE_PCA  SVR_Test_RMSE_PCA  SVR_Train_MAE_PCA  \
0    BBCA           66.751507          67.408745          46.474671   
1    BBNI           67.672473          64.014591          48.458865   
2    BBRI           80.523510          79.035960          57.286260   
3    BMRI           92.665320          86.087605          61.010846   

   SVR_Test_MAE_PCA  
0         49.625445  
1         48.193168  
2         58.116333  
3         60.463737  


**ARIMA**

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go
from joblib import dump

In [None]:
def preprocess_data_arima(df):
    df = df.dropna().reset_index(drop=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

    # Feature selection and scaling
    feature_data = df[features]
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(feature_data)

    # Apply PCA
    pca = PCA(n_components=0.95)  # Retain 95% of variance
    pca_features = pca.fit_transform(scaled_features)

    return pca_features, df['Close'], df.index, pca, scaler

In [None]:
def visualize_arima_performance(dates, y_train, y_test, y_pred_train, y_pred_test, title):
    train_dates = dates[:len(y_train)] if y_train is not None else []
    test_dates = dates[len(train_dates):len(train_dates) + len(y_test)]
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=train_dates, y=y_train, mode='markers', name='Train Actual', marker=dict(color='gray')))
    fig.add_trace(go.Scatter(x=test_dates, y=y_test, mode='markers', name='Validation Actual', marker=dict(color='red')))
    fig.add_trace(go.Scatter(x=train_dates, y=y_pred_train, mode='lines', name='Train Predictions', line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=test_dates, y=y_pred_test, mode='lines', name='Test Predictions', line=dict(color='orange')))
    fig.update_layout(
        title=title,
        xaxis=dict(rangeslider=dict(visible=True), type='date'),
        yaxis=dict(title='Close Price IDR')
    )
    fig.show()

In [None]:
def evaluate_arima_model(train, test, order):
    history = [x for x in train]
    predictions_train = []
    predictions_test = []

    # Training phase
    for t in range(len(train)):
        model = ARIMA(history, order=order)
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        predictions_train.append(yhat)
        history.append(train[t])

    # Testing phase
    for t in range(len(test)):
        model = ARIMA(history, order=order)
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        predictions_test.append(yhat)
        history.append(test[t])

    return model_fit, predictions_train, predictions_test

In [None]:
def process_arima(df, dataset_name):
    pca_features, close_prices, dates, pca, scaler = preprocess_data_arima(df)
    train_size = int(len(close_prices) * 0.8)
    train, test = close_prices[:train_size], close_prices[train_size:]
    order = (5, 1, 0)
    model_fit, predictions_train, predictions_test = evaluate_arima_model(train, test, order)

    train_rmse = np.sqrt(mean_squared_error(train, predictions_train))
    train_mae = mean_absolute_error(train, predictions_train)
    test_rmse = np.sqrt(mean_squared_error(test, predictions_test))
    test_mae = mean_absolute_error(test, predictions_test)

    all_predictions_train = np.concatenate([predictions_train, [np.nan]*len(test)])
    all_predictions_test = np.concatenate([[np.nan]*len(train), predictions_test])

    visualize_arima_performance(dates, train, test, all_predictions_train, all_predictions_test, f'ARIMA Model with PCA for {dataset_name}')

    # Save models and scalers
    dump(pca, f'{dataset_name}_pca.joblib')
    dump(scaler, f'{dataset_name}_scaler.joblib')
    dump(model_fit, f'{dataset_name}_arima.joblib')

    return {
        'Dataset': dataset_name,
        'ARIMA_Train_RMSE': train_rmse,
        'ARIMA_Train_MAE': train_mae,
        'ARIMA_Test_RMSE': test_rmse,
        'ARIMA_Test_MAE': test_mae
    }

In [None]:
results = []

In [None]:
for df, name in zip([bbca, bbni, bbri, bmri], ['BBCA', 'BBNI', 'BBRI', 'BMRI']):
    res = process_arima(df, name)
    results.append(res)

In [None]:
results_df = pd.DataFrame(results)
print(results_df)

  Dataset  ARIMA_Train_RMSE  ARIMA_Train_MAE  ARIMA_Test_RMSE  ARIMA_Test_MAE
0    BBCA         82.381766        54.003604        79.699483       59.297534
1    BBNI         85.747775        56.164725        69.840795       53.572081
2    BBRI         71.415068        52.547235        64.354122       48.059975
3    BMRI        142.593205        76.744196        90.775564       68.503597


In [None]:
model_bca = load('rf_model_pca_BBCA.joblib')
model_bni = load('rf_model_pca_BBNI.joblib')
model_bri = load('rf_model_pca_BBRI.joblib')
model_mri = load('rf_model_pca_BMRI.joblib')

# Simpan ulang model dengan versi yang sekarang digunakan
dump(model_bca, 'rf_model_pca_BBCA_revised.joblib')
dump(model_bni, 'rf_model_pca_BBNI_revised.joblib')
dump(model_bri, 'rf_model_pca_BBRI_revised.joblib')
dump(model_mri, 'rf_model_pca_BMRI_revised.joblib')

['rf_model_pca_BBCA_revised.joblib']

In [None]:
from sklearn.tree import DecisionTreeRegressor

def process_dataset(df, dataset_name):
    (X_train, X_test, y_train, y_test), dates = preprocess_data(df)

    # Apply PCA to reduce dimensionality
    pca = PCA(n_components=0.95)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)

    # Hyperparameter tuning for Decision Tree Regressor
    param_grid = {
        'max_depth': [None, 10, 20, 30, 40, 50],
        'min_samples_split': [2, 5, 10, 20],
        'min_samples_leaf': [1, 2, 4, 10]
    }
    dtr = DecisionTreeRegressor(random_state=42)
    grid_search = GridSearchCV(dtr, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train_pca, y_train)

    best_dtr_model = grid_search.best_estimator_
    dtr_train_rmse, dtr_test_rmse, dtr_train_mae, dtr_test_mae, dtr_predictions = evaluate_model(best_dtr_model, X_train_pca, X_test_pca, y_train, y_test)

    # Save the best Decision Tree model
    dump(best_dtr_model, f'{dataset_name}_best_dtr_model.joblib')

    y_true = np.concatenate([y_train, y_test])
    visualize_model_performance(dates, y_train, y_test, dtr_predictions, f'Decision Tree Model with PCA for {dataset_name}')

    return {
        'Dataset': dataset_name,
        'DTR_Train_RMSE_PCA': dtr_train_rmse,
        'DTR_Test_RMSE_PCA': dtr_test_rmse,
        'DTR_Train_MAE_PCA': dtr_train_mae,
        'DTR_Test_MAE_PCA': dtr_test_mae
    }

In [None]:
results_dtr = []

In [None]:
BBCARES = process_dataset(bbca, 'BBCA')
results_dtr.append(BBCARES)

  pid = os.fork()


In [None]:
BBNIRES = process_dataset(bbni, 'BBNI')
results_dtr.append(BBNIRES)

In [None]:
BBRIRES = process_dataset(bbri, 'BBRI')
results_dtr.append(BBRIRES)

In [None]:
BMRIRES = process_dataset(bmri, 'BMRI')
results_dtr.append(BMRIRES)

In [None]:
results_dectree = pd.DataFrame(results_dtr)

  Dataset  DTR_Train_RMSE_PCA  DTR_Test_RMSE_PCA  DTR_Train_MAE_PCA  \
0    BBCA           26.448271          92.028873          16.975025   
1    BBNI           35.532449          99.022157          23.410447   
2    BBRI           66.855329         128.179952          46.260584   
3    BMRI           48.631131         114.707221          33.166444   

   DTR_Test_MAE_PCA  
0         61.024729  
1         70.135741  
2         83.105479  
3         85.351094  


In [None]:
results_dectree

Unnamed: 0,Dataset,DTR_Train_RMSE_PCA,DTR_Test_RMSE_PCA,DTR_Train_MAE_PCA,DTR_Test_MAE_PCA
0,BBCA,26.448271,92.028873,16.975025,61.024729
1,BBNI,35.532449,99.022157,23.410447,70.135741
2,BBRI,66.855329,128.179952,46.260584,83.105479
3,BMRI,48.631131,114.707221,33.166444,85.351094
