## Imports

In [None]:
from datetime import datetime, timedelta
import logging
import pandas as pd
import numpy as np

from prometheus_api_client import MetricRangeDataFrame, PrometheusConnect

import sklearn
from sklearn.datasets import fetch_california_housing

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

from xgboost import XGBRegressor


from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
from sklearn.utils import Bunch

import matplotlib.pyplot as plt

## Constants

In [None]:
PROM_URL = "http://localhost:9090"
RATE_INTERVAL = "20s"

METRIC_BPF_CPU_TIME = "kepler_process_bpf_cpu_time_ms_total"
QUERY_BPF_CPU_TIME = "sum(rate(kepler_process_bpf_cpu_time_ms_total{}[{}]))"
COLUMNS_COMMAND_PID = ['command', 'pid']

METRIC_CPU_INSTRUCTIONS = "kepler_process_cpu_instructions_total"
QUERY_CPU_INSTRUCTIONS = "sum(rate(kepler_process_cpu_instructions_total{}[{}]))"

METRIC_CPU_CYCLES = "kepler_process_cpu_cycles_total"
QUERY_CPU_CYCLES = "sum(rate(kepler_process_cpu_cycles_total{}[{}]))"

METRIC_PKG_JOULES_TOTAL = "kepler_node_package_joules_total"
QUERY_KEPLER_PKG_JOULES_TOTAL = "sum(rate(kepler_node_package_joules_total{}[{}]))"

METRIC_VM_PKG_JOULES_TOTAL = "kepler_vm_package_joules_total"
QUERY_KEPLER_VM_PKG_JOULES_TOTAL = "sum(rate(kepler_vm_package_joules_total{}[{}]))"

METRIC_VM_CPU_CYCLES_TOTAL = "kepler_vm_cpu_cycles_total"
QUERY_KEPLER_VM_CPU_CYCLES_TOTAL = "sum(rate(kepler_vm_cpu_cycles_total{}[{}]))"

METRIC_VM_CPU_INSTRUCTIONS_TOTAL = "kepler_vm_cpu_instructions_total"
QUERY_KEPLER_VM_CPU_INSTRUCTIONS_TOTAL = "sum(rate(kepler_vm_cpu_instructions_total{}[{}]))"


METRIC_NODE_RAPL_PKG_JOULES_TOTAL = "node_rapl_package_joules_total"
QUERY_NODE_RAPL_PKG_JOULES_TOTAL = "sum(rate(node_rapl_package_joules_total{}[{}]))"
LABEL_RAPL_PATH = "/host/sys/class/powercap/intel-rapl:0"

METRIC_NODE_CPU_SCALING_FREQUENCY_HERTZ = "node_cpu_scaling_frequency_hertz"
QUERY_NODE_CPU_SCALING_FREQUENCY_HERTZ = "sum(node_cpu_scaling_frequency_hertz{})"
LABEL_RAPL_PATH = "/host/sys/class/powercap/intel-rapl:0"

JOB_DEV = {"job":"dev"}
JOB_METAL = {"job":"metal"}
JOB_VM = {"job":"vm"}
JOB_NODE_EXPORTER = {"job":"node-exporter"}
COMMAND_STRESS = {"command": ".*stress.*"}

METRIC_UP = "up"

## Set log level

In [None]:
logging.basicConfig(level=logging.ERROR)

In [None]:
def train_lr(data):
    X = data.drop('PRICE', axis=1)
    y = data['PRICE']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")
    print("Coefficients:")
    for feature, coef in zip(X.columns, model.coef_):
        print(f"{feature}: {coef}")
    return model

In [None]:
def test_california_housing():
    california: Bunch = fetch_california_housing()
    data: pd.DataFrame = pd.DataFrame(california.data, columns=california.feature_names)
    data['PRICE'] = california.target
    print(data.head())
    data.plot()
    plt.show()
    model = train_lr(data)
    return model

In [None]:
def fetch_prometheus_data(start_time, end_time, query, rename_value_column, columns=[], label_config: dict = None):
    try:
        prom = PrometheusConnect(url=PROM_URL, disable_ssl=True)
        if label_config:
            label_list = [str(key + "=~" + "'" + label_config[key] + "'") for key in label_config]
            labels = "{" + ",".join(label_list) + "}"
        else:
            labels = ""

        if "rate(" in query:
            query = query.format(labels, RATE_INTERVAL)
        else:
            query = query.format(labels)

        metric_data = prom.custom_query_range(query=query, start_time=start_time, end_time=end_time, step="1s")
        if not metric_data:
            raise ValueError(f"No data found for metric: {query}")
        #print(metric_data)
        metric_df = MetricRangeDataFrame(data=metric_data, columns=(columns + ['timestamp', 'value']), ts_as_datetime=False)
        metric_df.index = metric_df.index.astype('int64')
        metric_df.rename(columns={'value': rename_value_column}, inplace=True)
        metric_df = metric_df.sort_values(by='timestamp')
        return metric_df
    except Exception as e:
        import traceback
        print(f"An error occurred: {e}")
        traceback.print_exc()
        return pd.DataFrame()

In [None]:
def printDF(df: pd.DataFrame):
    df_name = df.attrs['name']
    print(f"{df_name} shape: {df.shape}, Columns: {df.columns}, Index={df.index}")
    print(df)

## Start and end time

In [None]:
duration_5_min = 1000 * 60 * 5
duration_10_min = 1000 * 60 * 10
duration_20_min = 1000 * 60 * 20
duration_30_min = 1000 * 60 * 30
duration_45_min = 1000 * 60 * 45
duration_1_hour = 1000 * 60 * 60
duration_2_hour = duration_1_hour * 2



end_time = datetime.now()
#end_time = datetime.fromtimestamp(1726759800)


duration = duration_1_hour

start_time = end_time - timedelta(milliseconds=duration)
#start_time = 1726633800
#start_time = datetime.fromtimestamp(1726752600)


## Query prometheus and get the following data frames
 - `bpf_cpu_time_df`: bpf cpu time for all processes, from kepler
 - `rapl_pkg_joules_df` : rapl power from Node Exporter
 - `kepler_pkg_joules_df` : kepler process package joules
 - `cpu_inst_df` : cpu instructions for all processes, from kepler

In [None]:
bpf_cpu_time_df = fetch_prometheus_data(
    start_time=start_time,
    end_time=end_time,
    query=QUERY_BPF_CPU_TIME,
    rename_value_column='bpf_cpu_time',
    label_config=JOB_METAL,
    #columns=COLUMNS_COMMAND_PID
    )
bpf_cpu_time_df.attrs = {"name": "bpf_cpu_time_df"}
#printDF(bpf_cpu_time_df)

In [None]:
vm_bpf_cpu_time_df = fetch_prometheus_data(
    start_time=start_time,
    end_time=end_time,
    query=QUERY_BPF_CPU_TIME,
    rename_value_column='vm_bpf_cpu_time',
    label_config=JOB_VM,
    #columns=COLUMNS_COMMAND_PID
    )
vm_bpf_cpu_time_df.attrs = {"name": "vm_bpf_cpu_time_df"}
#printDF(vm_bpf_cpu_time_df)

In [None]:
rapl_pkg_joules_df = fetch_prometheus_data(
    start_time=start_time,
    end_time=end_time,
    query=QUERY_NODE_RAPL_PKG_JOULES_TOTAL,
    rename_value_column='rapl_pkg_joules',
    label_config=JOB_NODE_EXPORTER)
rapl_pkg_joules_df.attrs = {"name": "rapl_pkg_joules_df"}
#printDF(rapl_pkg_joules_df)

In [None]:
kepler_node_pkg_joules_df = fetch_prometheus_data(
    start_time=start_time,
    end_time=end_time,
    query=QUERY_KEPLER_PKG_JOULES_TOTAL,
    rename_value_column='kepler_node_pkg_joules',
    label_config=JOB_METAL,
    columns=COLUMNS_COMMAND_PID)
kepler_node_pkg_joules_df.attrs = {"name": "kepler_node_pkg_joules_df"}
#printDF(kepler_node_pkg_joules_df)

In [None]:
kepler_vm_pkg_joules_df = fetch_prometheus_data(
    start_time=start_time,
    end_time=end_time,
    query=QUERY_KEPLER_VM_PKG_JOULES_TOTAL,
    rename_value_column='kepler_vm_pkg_joules',
    label_config=JOB_METAL,
    columns=COLUMNS_COMMAND_PID)
kepler_vm_pkg_joules_df.attrs = {"name": "kepler_vm_pkg_joules_df"}
#printDF(kepler_pkg_joules_df)

In [None]:
cpu_inst_df = fetch_prometheus_data(
    start_time=start_time,
    end_time=end_time,
    query=QUERY_CPU_INSTRUCTIONS,
    rename_value_column='cpu_instructions',
    label_config=JOB_METAL,
    columns=COLUMNS_COMMAND_PID)
cpu_inst_df.attrs = {"name": "cpu_inst_df"}
#printDF(cpu_inst_df)

In [None]:
cpu_cycles_df = fetch_prometheus_data(
    start_time=start_time,
    end_time=end_time,
    query=QUERY_CPU_CYCLES,
    rename_value_column='cpu_cycles',
    label_config=JOB_METAL,
    columns=COLUMNS_COMMAND_PID)
cpu_cycles_df.attrs = {"name": "cpu_cycles_df"}    
#printDF(cpu_cycles_df)

In [None]:
cpu_scaling_freq_df = fetch_prometheus_data(
    start_time=start_time,
    end_time=end_time,
    query=QUERY_NODE_CPU_SCALING_FREQUENCY_HERTZ,
    rename_value_column='cpu_scaling_freq',
    label_config=JOB_NODE_EXPORTER,
    columns=COLUMNS_COMMAND_PID)
cpu_scaling_freq_df.attrs = {"name": "cpu_scaling_freq_df"}    
#printDF(cpu_scaling_freq_df)

## Scale by num of CPUs

In [None]:
num_cpus = 16

#bpf_cpu_time_df['bpf_cpu_time'] = bpf_cpu_time_df['bpf_cpu_time'] / num_cpus
#cpu_inst_df['cpu_instructions'] = cpu_inst_df['cpu_instructions'] / num_cpus
#cpu_cycles_df['cpu_cycles'] = cpu_cycles_df['cpu_cycles'] / num_cpus

#rapl_pkg_joules_df['rapl_pkg_joules'] = rapl_pkg_joules_df['rapl_pkg_joules'] / num_cpus


## Sample values

In [None]:
vm_bpf_cpu_time_df.sample

In [None]:
rapl_pkg_joules_df.head()

## Plot 'em

In [None]:
ax = vm_bpf_cpu_time_df['vm_bpf_cpu_time'].plot(label='vm_bpf_cpu_time')
ax2 = ax.twinx()
#rapl_pkg_joules_df.plot(ax = ax2, color='orange', label='rapl_pkg_joules')
#kepler_pkg_joules_df.plot(ax = ax2, color='green', label='kepler_pkg_joules')
kepler_vm_pkg_joules_df['kepler_vm_pkg_joules'].plot(ax = ax2, color='green', label='kepler_vm_pkg_joules')

In [None]:
#cpu_inst_df.plot()
#cpu_cycles_df.plot()
#cpu_scaling_freq_df
#plt.show()

In [None]:
#plt.scatter(x = bpf_cpu_time_df['bpf_cpu_time'], y = rapl_pkg_joules_df['rapl_pkg_joules'])
plt.scatter(x = vm_bpf_cpu_time_df['vm_bpf_cpu_time'], y = kepler_vm_pkg_joules_df['kepler_vm_pkg_joules'])

In [None]:
plt.scatter(x = bpf_cpu_time_df['bpf_cpu_time'], y = rapl_pkg_joules_df['rapl_pkg_joules'])

In [None]:
plt.scatter(x = cpu_cycles_df['cpu_cycles'], y = rapl_pkg_joules_df['rapl_pkg_joules'])

# Prepare input data for training

In [None]:
#X = pd.DataFrame(bpf_cpu_time_df['bpf_cpu_time'], cpu_inst_df['cpu_instructions'])
#X = bpf_cpu_time_df.drop(['command', 'pid', 'bpf_cpu_time_ratio'], axis =1)
X = bpf_cpu_time_df
#X['cpu_instructions'] = cpu_inst_df['cpu_instructions']
#X['cpu_cycles'] = cpu_cycles_df['cpu_cycles']
#X['cpu_scaling_freq'] = cpu_scaling_freq_df['cpu_scaling_freq']
y = pd.DataFrame(kepler_vm_pkg_joules_df['kepler_vm_pkg_joules'])

In [None]:
X.sample

In [None]:
X.columns

In [None]:
X.shape

In [None]:
y.sample

# Split into training and test data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.columns)
X_train.sample

# Train Models and test

## Linear Regression

In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")
print(f"MAPE: {mape}")
print("Coefficients:")
for feature, coef in zip(X.columns, lr_model.coef_):
    print(f"{feature}: {coef}")

In [None]:
plt.scatter(x = y_test, y = y_pred)

## Polynomial Regression

In [None]:
poly = PolynomialFeatures(degree=2)
pr_model = make_pipeline(poly, LinearRegression())
pr_model.fit(X_train, y_train)

y_pred = pr_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100


print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")
print(f"MAPE: {mape}")

#print(model)
#print("Coefficients:")
#for feature, coef in zip(X.columns, model.coef_):
#    print(f"{feature}: {coef}")


In [None]:
plt.scatter(x = y_test, y = y_pred)

##  Logarithmic Transformation

In [None]:
# Apply log transformation to the target variable
X_train_log = np.log(X_train)

# Fit a Linear Regression model
lt_model = LinearRegression()
lt_model.fit(X_train_log, y_train)

# Predict and reverse log transformation
X_test_log = np.log(X_test)
y_pred = lt_model.predict(X_test_log)
#y_pred_log = lt_model.predict(X_test)
#y_pred = np.exp(y_pred_log)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100


print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")
print(f"MAPE: {mape}")




In [None]:
plt.scatter(x = y_test, y = y_pred)

## Decision Trees

In [None]:
dt_model = DecisionTreeRegressor(max_depth=5)  # Tune depth as necessary
dt_model.fit(X_train, y_train)

y_pred = dt_model.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100


print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")
print(f"MAPE: {mape}")


In [None]:
plt.scatter(x = y_test, y = y_pred)

## Random Forests

In [None]:
rf_model = RandomForestRegressor(n_estimators=100)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100


print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")
print(f"MAPE: {mape}")


In [None]:
plt.scatter(x = y_test, y = y_pred)

##  Gradient Boosting (XGBoost)

In [None]:
gb_model = XGBRegressor()
gb_model.fit(X_train, y_train)

y_pred = gb_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100


print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")
print(f"MAPE: {mape}")


In [None]:
plt.scatter(x = y_test, y = y_pred)

## Support Vector Regression (SVR)

In [None]:
svr_model = SVR(kernel='rbf')  # Try 'poly' for polynomial kernel
svr_model.fit(X_train, y_train)

y_pred = svr_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100


print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")
print(f"MAPE: {mape}")


In [None]:
plt.scatter(x = y_test, y = y_pred)

# Model Validation

## Set validation start and end time

In [None]:
validation_end_time = datetime.now()
#end_time = datetime.fromtimestamp(1727088375)

validation_duration = duration_30_min

validation_start_time = validation_end_time - timedelta(milliseconds=validation_duration)

In [None]:
print(f"start: {validation_start_time}")
print(f"end  : {validation_end_time}")

## Validation on vm bpf cpu time

### Fetch new data

In [None]:
vm_bpf_cpu_time_df = fetch_prometheus_data(
    start_time=validation_start_time,
    end_time=validation_end_time,
    query=QUERY_BPF_CPU_TIME,
    rename_value_column='bpf_cpu_time',
    label_config=JOB_VM,    
    #columns=COLUMNS_COMMAND_PID
    )
vm_bpf_cpu_time_df.attrs = {"name": "vm_bpf_cpu_time_df"}
#printDF(vm_bpf_cpu_time_df)

In [None]:
rapl_pkg_joules_df = fetch_prometheus_data(
    start_time=validation_start_time,
    end_time=validation_end_time,
    query=QUERY_NODE_RAPL_PKG_JOULES_TOTAL,
    rename_value_column='rapl_pkg_joules',
    label_config=JOB_NODE_EXPORTER|{"path": LABEL_RAPL_PATH})
rapl_pkg_joules_df.attrs = {"name": "rapl_pkg_joules_df"}
#printDF(rapl_pkg_joules_df)

In [None]:
kepler_pkg_joules_df = fetch_prometheus_data(
    start_time=validation_start_time,
    end_time=validation_end_time,
    query=QUERY_KEPLER_PKG_JOULES_TOTAL,
    rename_value_column='kepler_pkg_joules',
    label_config=JOB_METAL,        
    columns=COLUMNS_COMMAND_PID)
kepler_pkg_joules_df.attrs = {"name": "kepler_pkg_joules_df"}
#printDF(kepler_pkg_joules_df)

In [None]:
vm_pkg_joules_df = fetch_prometheus_data(
    start_time=validation_start_time,
    end_time=validation_end_time,
    query=QUERY_KEPLER_VM_PKG_JOULES_TOTAL,
    rename_value_column='vm_pkg_joules',
    label_config=JOB_METAL)
vm_pkg_joules_df.attrs = {"name": "vm_pkg_joules_df"}
#printDF(vm_pkg_joules_df)

In [None]:
vm_cpu_inst_df = fetch_prometheus_data(
    start_time=validation_start_time,
    end_time=validation_end_time,
    query=QUERY_CPU_INSTRUCTIONS,
    rename_value_column='vm_cpu_instructions',
    label_config=JOB_METAL,        
    columns=COLUMNS_COMMAND_PID)
vm_cpu_inst_df.attrs = {"name": "vm_cpu_inst_df"}
#printDF(vm_cpu_inst_df)

In [None]:
vm_cpu_cycles_df = fetch_prometheus_data(
    start_time=validation_start_time,
    end_time=validation_end_time,
    query=QUERY_CPU_CYCLES,
    rename_value_column='vm_cpu_cycles',
    label_config=JOB_METAL,            
    columns=COLUMNS_COMMAND_PID)
vm_cpu_cycles_df.attrs = {"name": "vm_cpu_cycles_df"}    
#printDF(vm_cpu_cycles_df)

In [None]:
cpu_scaling_freq_df = fetch_prometheus_data(
    start_time=validation_start_time,
    end_time=validation_end_time,
    query=QUERY_NODE_CPU_SCALING_FREQUENCY_HERTZ,
    rename_value_column='cpu_scaling_freq',
    label_config=JOB_NODE_EXPORTER,            
    columns=COLUMNS_COMMAND_PID)
cpu_scaling_freq_df.attrs = {"name": "cpu_scaling_freq_df"}    
#printDF(cpu_scaling_freq_df)

### Scale by num of cpus

In [None]:
num_vcpus = 2

In [None]:
#vm_bpf_cpu_time_df['bpf_cpu_time'] = vm_bpf_cpu_time_df['bpf_cpu_time'] / num_vcpus
#vm_cpu_inst_df['vm_cpu_instructions'] = vm_cpu_inst_df['vm_cpu_instructions'] / num_vcpus
#vm_cpu_cycles_df['vm_cpu_cycles'] = vm_cpu_cycles_df['vm_cpu_cycles'] / num_vcpus

### prepare vm validation data

In [None]:
X = vm_bpf_cpu_time_df
#X['cpu_instructions'] = vm_cpu_inst_df['vm_cpu_instructions']
#X['cpu_cycles'] = vm_cpu_cycles_df['vm_cpu_cycles']
#X['cpu_scaling_freq'] = cpu_scaling_freq_df['cpu_scaling_freq']

In [None]:
#print(bpf_cpu_time_df.shape)
#bpf_cpu_time_df.sample

In [None]:
X.sample

In [None]:
vm_pkg_joules_df.sample

### Run Validations

In [None]:
trained_models = {"LinearRegression":lr_model}|{"PolynomialRegression":pr_model}|{"DecisionTree":dt_model}|{"RandomForest":rf_model}|{"XGBoost":gb_model}|{"SVR":svr_model}|{"LogarithmicTransformation":lt_model}
test_models = {"LinearRegression":lr_model}|{"PolynomialRegression":pr_model}|{"XGBoost":gb_model}


plt.plot(X['bpf_cpu_time'])
plt.show()
plt.plot(vm_pkg_joules_df['vm_pkg_joules'])
plt.show()


for model_name, model in test_models.items():
    ax = vm_pkg_joules_df['vm_pkg_joules'].plot(legend="vm_pkg_joules")
    #ax2 = ax.twinx()
    
    x_test = X
    print(f"Model : {model_name}")
    if model_name == "LogarithmicTransformation":
        x_test = np.log(X)
    y_pred = model.predict(x_test)
    #y_pred = y_pred * num_vcpus
    mse = mean_squared_error(vm_pkg_joules_df['vm_pkg_joules'], y_pred)
    r2 = r2_score(vm_pkg_joules_df['vm_pkg_joules'], y_pred)
    mape = mean_absolute_percentage_error(vm_pkg_joules_df['vm_pkg_joules'], y_pred) * 100

    
    
    print(f"Mean Squared Error (MSE)         : {mse}")
    print(f"Mean Abs percentage Error (MAPE) : {mape}")    
    print(f"R^2 Score                        : {r2}")    
    df = pd.DataFrame(y_pred, index=vm_pkg_joules_df.index, columns=[model_name])
    corr = df.corrwith(vm_pkg_joules_df['vm_pkg_joules'])   
    corr_value = corr[model_name]
    print(f"Correlation        : {corr_value:4f}")
    
    df.plot(ax = ax, color="orange", legend=model_name)


    plt.legend()
    plt.show()

In [None]:
trained_models = {"LinearRegression":lr_model}|{"PolynomialRegression":pr_model}|{"DecisionTree":dt_model}|{"RandomForest":rf_model}|{"XGBoost":gb_model}|{"SVR":svr_model}|{"LogarithmicTransformation":lt_model}
test_models = {"LinearRegression":lr_model}|{"PolynomialRegression":pr_model}|{"XGBoost":gb_model}

for model_name, model in test_models.items():
    ax = rapl_pkg_joules_df['rapl_pkg_joules'].plot(legend="rapl_pkg_joules")
    #ax2 = ax.twinx()
    
    x_test = X
    print(f"Model : {model_name}")
    if model_name == "LogarithmicTransformation":
        x_test = np.log(X)
    y_pred = model.predict(x_test)
    #y_pred = y_pred * num_vcpus
    mse = mean_squared_error(rapl_pkg_joules_df['rapl_pkg_joules'], y_pred)
    r2 = r2_score(rapl_pkg_joules_df['rapl_pkg_joules'], y_pred)
    mape = mean_absolute_percentage_error(rapl_pkg_joules_df['rapl_pkg_joules'], y_pred) * 100

    
    
    print(f"Mean Squared Error (MSE)         : {mse}")
    print(f"Mean Abs percentage Error (MAPE) : {mape}")    
    print(f"R^2 Score                        : {r2}")    
    df = pd.DataFrame(y_pred, index=rapl_pkg_joules_df.index, columns=[model_name])
    corr = df.corrwith(rapl_pkg_joules_df['rapl_pkg_joules'])   
    corr_value = corr[model_name]
    print(f"Correlation        : {corr_value:4f}")
    
    df.plot(ax = ax, color="orange", legend=model_name)


    plt.legend()
    plt.show()