In [56]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Lambda, Multiply
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.decomposition import TruncatedSVD
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.impute import SimpleImputer
from tensorflow.keras.layers import Add, LeakyReLU
from tensorflow.keras.activations import relu
seed = 1
import tensorflow.keras.backend as K

In [119]:
############################# Step 1: Data Preprocessing ############################# 

# Define directories
train_dir = "./altis_power_cap_res/runs/train"                 # GPU apps
train_dir_2 = "./npb_power_cap_res/runs/train"           # CPU-only apps

validation_dir = "./ecp_power_cap_res/runs/validation"         # GPU apps
validation_dir_2 = "./npb_power_cap_res/runs/validation"   # CPU-only apps

save_model_dir = "./MLP_model/moe_model/arxiv"
os.makedirs(save_model_dir, exist_ok=True)

gpu_metrics = ["SM Clock", "FP Active", "DRAM Active"]
gpu_metrics=[]
cpu_metrics = ["IPS"]
cpu_metrics = []

###################################
# -------- TRAINING DATA -------- #
###################################

train_csv_files = [f for f in os.listdir(train_dir) if f.endswith('.csv')] + \
                  [f for f in os.listdir(train_dir_2) if f.endswith('.csv')]

# train_csv_files = os.listdir(train_dir) 

train_data = []
for file in train_csv_files:
    if file.endswith(".csv"):
        if file in os.listdir(train_dir):
            file_path = os.path.join(train_dir, file)
            gpu_enabled = 1
        else:
            file_path = os.path.join(train_dir_2, file)
            gpu_enabled = 0
            
        df = pd.read_csv(file_path)
        df = df[~df["CPU Power Cap"].isin([120, 130])]
        df["gpu_enabled"] = gpu_enabled

        # Zero out GPU metrics for CPU-only apps
        # if gpu_enabled == 0:
        #     for col in gpu_metrics:
        #         if col in df.columns:
        #             df[col] = 0
        # if gpu_enabled == 1:
        #     for col in cpu_metrics:
        #         if col in df.columns:
        #             df[col] = 0
                    
        train_data.append(df)

train_df = pd.concat(train_data, ignore_index=True)
# train_df.dropna(inplace=True)

#####################################
# -------- VALIDATION DATA -------- #
#####################################

validation_csv_files = [f for f in os.listdir(validation_dir) if f.endswith('.csv')] + \
                       [f for f in os.listdir(validation_dir_2) if f.endswith('.csv')]

validation_data = {}
for file in validation_csv_files:
    if file.endswith(".csv"):
        if file in os.listdir(validation_dir):
            file_path = os.path.join(validation_dir, file)
            gpu_enabled = 1
        else:
            file_path = os.path.join(validation_dir_2, file)
            gpu_enabled = 0

        df = pd.read_csv(file_path)
        df = df[~df["CPU Power Cap"].isin([120, 130])]
        df["gpu_enabled"] = gpu_enabled

        # Zero out GPU metrics for CPU-only apps
        if gpu_enabled == 0:
            for col in gpu_metrics:
                if col in df.columns:
                    df[col] = 0

        # if gpu_enabled == 1:
        #     for col in cpu_metrics:
        #         if col in df.columns:
        #             df[col] = 0

        df.dropna(inplace=True)
        new_app_name = file.replace("_performance.csv", "")
        validation_data[new_app_name] = df

In [120]:
############################# Step 2: Build MLP Model ############################# 

# feature_cols = ["CPU Power Cap", "GPU Power Cap", "IPS", "Memory Throughput","SM Clock", "DRAM Active", "FP Active", "gpu_enabled"]

# feature_cols = ["CPU Power Cap", "GPU Power Cap", "IPS", "LLC Misses", "Memory Throughput", "SM Clock", "DRAM Active", "FP Active", "gpu_enabled"]
feature_cols = ["CPU Power Cap", "GPU Power Cap", "Memory Throughput", "SM Clock", "DRAM Active", "FP Active", "gpu_enabled"]


target_col = "Performance"

# Extract features and labels
X_train = train_df[feature_cols].values
y_train = train_df[target_col].values.reshape(-1, 1)

# Normalize input and output
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train)

input_layer = Input(shape=(7,))

############################# Step 2: Build MoE Model #############################

# Split into 7 features and the gpu flag (optional to include in gate)
features = Lambda(lambda x: x[:, :-1])(input_layer)

# ---------------- Gating Network ---------------- #
gate_hidden = Dense(16, activation='relu')(features)
gate_hidden = Dense(8, activation='relu')(gate_hidden)
gate_output = Dense(1, activation='sigmoid')(gate_hidden)  # scalar gate between 0 and 1

# ---------------- Expert 1: CPU ---------------- #
cpu_branch = Dense(256, activation='relu')(features)
cpu_branch = BatchNormalization()(cpu_branch)
cpu_branch = Dropout(0.3)(cpu_branch)
cpu_branch = Dense(128, activation='relu')(cpu_branch)
cpu_branch = BatchNormalization()(cpu_branch)
cpu_branch = Dropout(0.3)(cpu_branch)
cpu_branch = Dense(64, activation='selu')(cpu_branch)
cpu_branch = Dense(32, activation='selu')(cpu_branch)
cpu_output = Dense(1)(cpu_branch)

# ---------------- Expert 2: GPU ---------------- #
gpu_branch = Dense(256, activation='relu')(features)
gpu_branch = BatchNormalization()(gpu_branch)
gpu_branch = Dropout(0.3)(gpu_branch)
gpu_branch = Dense(128, activation='relu')(gpu_branch)
gpu_branch = BatchNormalization()(gpu_branch)
gpu_branch = Dropout(0.3)(gpu_branch)
gpu_branch = Dense(64, activation='selu')(gpu_branch)
gpu_branch = Dense(32, activation='selu')(gpu_branch)
gpu_output = Dense(1)(gpu_branch)

# ---------------- Soft Gating ---------------- #
# gate_output is scalar in (0, 1)
final_output = Add()([
    Multiply()([gate_output, gpu_output]),
    Multiply()([Lambda(lambda x: 1 - x)(gate_output), cpu_output])
])

# Compile model
model = Model(inputs=input_layer, outputs=final_output)

In [117]:
############################# Step 3: Train MLP Model #############################
# Compile the model with a different optimizer
model.compile(optimizer='nadam', loss='mse', metrics=['mae'])

# Train the model
history = model.fit(X_train_scaled, y_train_scaled, epochs=50, batch_size=32, verbose=3)

# Save the trained model
model.save(os.path.join(save_model_dir, "performance_prediction_model.h5"))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




In [121]:
############################# Step 4: Evaluate Model Performance ############################# 

model_path = os.path.join(save_model_dir, "performance_prediction_model.h5")
model = tf.keras.models.load_model(model_path, custom_objects={"mse": tf.keras.losses.MeanSquaredError()})

# Define feature columns and target
# feature_cols = ["CPU Power Cap", "GPU Power Cap", "IPS", "Memory Throughput", "SM Clock", "DRAM Active", "FP Active","gpu_enabled"]
# feature_cols = ["CPU Power Cap", "GPU Power Cap", "IPS", "LLC Misses", "Memory Throughput", "SM Clock", "DRAM Active", "FP Active","gpu_enabled"]
feature_cols = ["CPU Power Cap", "GPU Power Cap","Memory Throughput", "SM Clock", "DRAM Active", "FP Active","gpu_enabled"]

target_col = "Performance"

# Evaluate each validation dataset
for app_name, df_new in validation_data.items():
    df_new["Power Pair"] = list(zip(df_new["CPU Power Cap"], df_new["GPU Power Cap"]))


    df_sampled = df_new.sample(frac=0.1, random_state=1)
    sampled_pairs = df_sampled["Power Pair"].unique()

    # selected_pairs = [
    #  (200, 250), (120, 250), (200, 150), (120, 150),
    #  (160, 250), (160, 200), (140, 150), (180, 200)
    # ]
    
    # df_sampled = df_new[df_new["Power Pair"].isin(selected_pairs)]
    # sampled_pairs = df_sampled["Power Pair"].unique()
    

    true_values, nn_predicted_values, accuracy_values = [], [], []

    for power_pair in sampled_pairs:
        # Extract data for the selected power pair
        X_sample = df_new[df_new["Power Pair"] == power_pair][feature_cols].values

        # Normalize features
        X_sample_scaled = scaler_X.transform(X_sample)

        # Predict performance
        y_pred = model.predict(X_sample_scaled)
        predicted_value = scaler_y.inverse_transform(y_pred)[0][0]

        # True value
        true_value = df_new.loc[df_new["Power Pair"] == power_pair, target_col].values[0]
        true_values.append(true_value)
        nn_predicted_values.append(predicted_value)

        # Accuracy
        accuracy = 100 - (abs(true_value - predicted_value) / true_value * 100)
        accuracy_values.append(accuracy)

    # Evaluation metrics
    nn_mae = mean_absolute_error(true_values, nn_predicted_values)
    nn_rmse = np.sqrt(mean_squared_error(true_values, nn_predicted_values))
    nn_r2 = r2_score(true_values, nn_predicted_values)
    avg_accuracy = np.mean(accuracy_values)

    # Output results
    print(f"Neural Network Prediction for {app_name} (Sampled 10% Power Pairs, One-by-One Prediction):")
    print(f"MAE: {nn_mae:.4f}")
    # print(f"RMSE: {nn_rmse:.4f}")
    # print(f"R² Score: {nn_r2:.4f}")
    print(f"Avg Accuracy: {avg_accuracy:.2f}%\n")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 616ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Neural Network Prediction for lammps (Sampled 10% Power Pairs, One-by-One Prediction):
MAE: 0.0229
Avg Accuracy: 97.53%

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [

In [59]:
import os
import pandas as pd

############################# Step 5: Populate Initial Performance Matrix #############################

# Dictionary to store application performance data
app_data = {}
cpu_apps = []
gpu_apps = []

# Process each application's performance data
for file, df in zip(train_csv_files, train_data):
    app_name = file.replace("_performance.csv", "")
    
    df = df[["CPU Power Cap", "GPU Power Cap", "Performance", "gpu_enabled"]].copy()
    df["Power Pair"] = list(zip(df["CPU Power Cap"], df["GPU Power Cap"]))
    app_data[app_name] = df[["Power Pair", "Performance"]].set_index("Power Pair")
    
    # Classify app type
    if int(df["gpu_enabled"].iloc[0]) == 1:
        gpu_apps.append(app_name)
    else:
        cpu_apps.append(app_name)

# Get all power pairs from the first CPU app file
cpu_power_pairs = None
for df in train_data:
    if int(df["gpu_enabled"].iloc[0]) == 0:
        cpu_power_pairs = list(zip(df["CPU Power Cap"], df["GPU Power Cap"]))
        break

# Get all power pairs from the first GPU app file
gpu_power_pairs = None
for df in train_data:
    if int(df["gpu_enabled"].iloc[0]) == 1:
        gpu_power_pairs = list(zip(df["CPU Power Cap"], df["GPU Power Cap"]))
        break

# Initialize matrices
cpu_performance_matrix = pd.DataFrame(index=cpu_power_pairs, columns=sorted(cpu_apps))
gpu_performance_matrix = pd.DataFrame(index=gpu_power_pairs, columns=sorted(gpu_apps))

# Fill performance matrices
for app_name in cpu_apps:
    cpu_performance_matrix[app_name] = app_data[app_name]["Performance"].reindex(cpu_power_pairs)

for app_name in gpu_apps:
    gpu_performance_matrix[app_name] = app_data[app_name]["Performance"].reindex(gpu_power_pairs)


In [53]:
gpu_performance_matrix

Unnamed: 0,bfs,cfd,cfd_double,fdtd2d,gemm,gups,kmeans,lavamd,maxflops,nw,particlefilter_float,particlefilter_naive,pathfinder,raytracing,sort,srad,where
"(140, 150)",0.782628,0.788081,0.874924,0.758538,0.782371,0.970930,0.931905,0.796534,0.855049,0.762462,0.870874,0.795810,0.798586,0.825896,0.792024,0.768857,0.772229
"(140, 160)",0.782592,0.791431,0.884138,0.814710,0.789283,0.985264,0.938325,0.803309,0.850994,0.767282,0.885183,0.797192,0.795864,0.849484,0.792013,0.758693,0.787851
"(140, 170)",0.782601,0.784739,0.903143,0.879867,0.790290,0.985206,0.951275,0.803328,0.863409,0.753061,0.899890,0.798604,0.795908,0.869364,0.792001,0.775719,0.768429
"(140, 180)",0.782634,0.791383,0.903177,0.916591,0.793302,0.999919,0.951318,0.803374,0.859176,0.762479,0.915196,0.782159,0.795890,0.895556,0.792006,0.758713,0.791803
"(140, 190)",0.784872,0.788045,0.933296,0.956408,0.793329,0.999970,0.938258,0.803358,0.859219,0.757750,0.930926,0.783483,0.795895,0.912004,0.792054,0.772256,0.776107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"(200, 210)",0.989018,0.999929,0.999995,0.999968,0.995155,0.999970,0.992744,0.996376,0.972553,0.991925,0.964263,0.995644,0.991518,0.946887,0.999934,0.971782,0.999981
"(200, 220)",0.989081,0.999899,0.999900,0.999996,0.996682,0.999964,0.992749,0.996441,0.972557,0.999962,0.981753,0.995596,0.991496,0.963250,0.999986,0.966463,0.999989
"(200, 230)",0.989075,0.999929,0.999983,0.999910,0.998309,0.999998,0.992676,0.996442,0.967225,0.976021,0.981793,0.997810,1.000000,0.971693,0.999926,0.961073,0.999980
"(200, 240)",0.992676,0.994592,0.999987,0.999956,0.995200,0.999963,0.992713,0.989401,0.967221,0.976005,1.000000,0.995652,0.999923,0.984538,0.999985,0.977409,0.999994


In [54]:
cpu_performance_matrix

Unnamed: 0,bt,cg,ep,is
"(260, 250)",0.602786,0.561582,0.620271,0.799374
"(280, 250)",0.651994,0.606457,0.656167,0.856609
"(300, 250)",0.689188,0.649918,0.671385,0.866336
"(320, 250)",0.718875,0.68372,0.691508,0.88867
"(340, 250)",0.748335,0.731627,0.725172,0.922938
"(360, 250)",0.784692,0.747155,0.767691,0.959522
"(380, 250)",0.818142,0.791795,0.794974,0.959731
"(400, 250)",0.855921,0.828531,0.818045,0.98623
"(420, 250)",0.877366,0.842954,0.848774,0.999599
"(440, 250)",0.897711,0.878788,0.856685,0.999822


In [6]:
############################# Step 6: Construct Sparse Matrix and Predict Missing Value ############################# 

# # Directories
# save_model_dir = "./altis_power_cap_res/2_dual_cap/model"
# os.makedirs(save_model_dir, exist_ok=True)

# # Results storage
# nn_results = []
# cf_results = []

# # Process each new application
# for file in validation_csv_files:
#     file_path = os.path.join(validation_dir, file)
#     new_app_name = file.replace("_performance.csv", "")

#     # Load validation data
#     df_new = pd.read_csv(file_path)
#     df_new["Power Pair"] = list(zip(df_new["CPU Power Cap"], df_new["GPU Power Cap"]))

#     # Add new application to performance matrix
#     performance_matrix[new_app_name] = np.nan

#     #############  Predict 20% of power pairs using trained NN --- Construct Sparse Matrix  ############# 
    
#     df_sampled = df_new.sample(frac=0.2, random_state=seed)  # Select 20% of rows
#     sampled_pairs = df_sampled["Power Pair"].unique()

#     true_values, nn_predicted_values = [], []
#     for power_pair in sampled_pairs:
#         X_sample = df_new[df_new["Power Pair"] == power_pair][feature_cols].values
#         X_sample_scaled = scaler_X.transform(X_sample)
        
#         # Predict performance
#         y_pred = model.predict(X_sample_scaled)
#         predicted_value = scaler_y.inverse_transform(y_pred)[0][0]
    
#         # Fill performance matrix with NN predictions
#         performance_matrix.at[power_pair, new_app_name] = predicted_value

#         # Store true and predicted values for error calculation
#         if power_pair in df_new.set_index("Power Pair").index:
#             actual = df_new.loc[df_new["Power Pair"] == power_pair, "Performance"].values[0]
#             true_values.append(actual)
#             nn_predicted_values.append(predicted_value)
    
#     ############## Compute NN Prediction Accuracy ##############
    
#     nn_mae = mean_absolute_error(true_values, nn_predicted_values)
#     nn_rmse = np.sqrt(mean_squared_error(true_values, nn_predicted_values))
#     nn_r2 = r2_score(true_values, nn_predicted_values)

#     # Percentage-Based Prediction Error
#     nn_pred_error = np.mean(np.abs((np.array(true_values) - np.array(nn_predicted_values)) / np.array(true_values))) * 100

#     nn_results.append((new_app_name, nn_mae, nn_rmse, nn_r2, nn_pred_error))

#     print(f"NN Prediction for {new_app_name}: MAE={nn_mae:.4f}, Prediction Accuracy={100 - nn_pred_error:.2f}%")

#     ##############  Train Neural CF  ##############
#     power_pair_map = {pair: i for i, pair in enumerate(performance_matrix.index)}
#     app_map = {app: i for i, app in enumerate(performance_matrix.columns)}

#     train_data = []
#     train_labels = []
    
#     for app in performance_matrix.columns:
#         for power_pair in performance_matrix.index:
#             if not np.isnan(performance_matrix.at[power_pair, app]):
#                 train_data.append([power_pair_map[power_pair], app_map[app]])
#                 train_labels.append(performance_matrix.at[power_pair, app])

#     train_data = np.array(train_data)
#     train_labels = np.array(train_labels)

#     # Define Neural CF Model
#     num_power_pairs = len(power_pair_map)
#     num_apps = len(app_map)
#     latent_dim = 10  # Embedding size

#     input_power_pair = Input(shape=(1,))
#     input_app = Input(shape=(1,))

#     power_embedding = Embedding(num_power_pairs, latent_dim)(input_power_pair)
#     app_embedding = Embedding(num_apps, latent_dim)(input_app)

#     power_vec = Flatten()(power_embedding)
#     app_vec = Flatten()(app_embedding)

#     merged = Concatenate()([power_vec, app_vec])
#     dense_1 = Dense(128, activation='selu')(merged)
#     dense_2 = Dense(64, activation='selu')(dense_1)
#     dense_3 = Dense(32, activation='selu')(dense_2)
#     output = Dense(1, activation='linear')(dense_3)

#     ncf_model = Model(inputs=[input_power_pair, input_app], outputs=output)
#     ncf_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
#     ncf_model.fit([train_data[:, 0], train_data[:, 1]], train_labels, epochs=25, batch_size=32, verbose=3)

#     ##############  Predict Missing Value via NCF  ##############
#     for power_pair in performance_matrix.index:
#         if np.isnan(performance_matrix.at[power_pair, new_app_name]):
#             power_idx = power_pair_map[power_pair]
#             app_idx = app_map[new_app_name]
#             pred_value = ncf_model.predict([np.array([power_idx]), np.array([app_idx])])[0][0]
#             performance_matrix.at[power_pair, new_app_name] = pred_value

#     ############### Compute CF Prediction Accuracy ##############
#     true_values = df_new.set_index("Power Pair")["Performance"]
#     predicted_values = performance_matrix[new_app_name].reindex(true_values.index)

#     cf_mae = mean_absolute_error(true_values, predicted_values)
#     cf_rmse = np.sqrt(mean_squared_error(true_values, predicted_values))
#     cf_r2 = r2_score(true_values, predicted_values)

#     # Percentage-Based Prediction Error
#     cf_pred_error = np.mean(np.abs((true_values - predicted_values) / true_values)) * 100

#     cf_results.append((new_app_name, cf_mae, cf_rmse, cf_r2, cf_pred_error))

#     print(f"NCF Prediction for {new_app_name}: MAE={cf_mae:.4f}, Prediction Accuracy={100 - cf_pred_error:.2f}%")

# # Convert results into a DataFrame and display
# nn_results_df = pd.DataFrame(nn_results, columns=["Application", "MAE", "RMSE", "R²", "Prediction Error (%)"])
# cf_results_df = pd.DataFrame(cf_results, columns=["Application", "MAE", "RMSE", "R²", "Prediction Error (%)"])

In [63]:
model_path = os.path.join(save_model_dir, "performance_prediction_model.h5")
model = tf.keras.models.load_model(model_path, custom_objects={"mse": tf.keras.losses.MeanSquaredError()})

# Unified feature set for both CPU and GPU apps
feature_cols = ["CPU Power Cap", "GPU Power Cap", "Memory Throughput", "SM Clock", "DRAM Active", "FP Active", "gpu_enabled"]
target_col = "Performance"

nn_results = []
cf_results = []

for new_app_name, df_new in validation_data.items():
    df_new["Power Pair"] = list(zip(df_new["CPU Power Cap"], df_new["GPU Power Cap"]))
    is_gpu_app = df_new["gpu_enabled"].iloc[0] == 1

    # Choose the correct performance matrix
    perf_matrix = gpu_performance_matrix if is_gpu_app else cpu_performance_matrix
    perf_matrix[new_app_name] = np.nan

    # Use unified scalers and feature columns
    X = df_new[feature_cols].values
    X_scaled = scaler_X.transform(X)

    # Predict for a subset of power pairs
    df_sampled = df_new.sample(frac=0.1, random_state=1)
    sampled_pairs = df_sampled["Power Pair"].unique()

    true_values, nn_predicted_values = [], []
    for power_pair in sampled_pairs:
        X_sample = df_new[df_new["Power Pair"] == power_pair][feature_cols].values
        X_sample_scaled = scaler_X.transform(X_sample)
        y_pred = model.predict(X_sample_scaled, verbose=0)
        predicted_value = scaler_y.inverse_transform(y_pred)[0][0]

        perf_matrix.at[power_pair, new_app_name] = predicted_value

        actual = df_new.loc[df_new["Power Pair"] == power_pair, "Performance"].values[0]
        true_values.append(actual)
        nn_predicted_values.append(predicted_value)

    nn_mae = mean_absolute_error(true_values, nn_predicted_values)
    nn_rmse = np.sqrt(mean_squared_error(true_values, nn_predicted_values))
    nn_r2 = r2_score(true_values, nn_predicted_values)
    nn_pred_error = np.mean(np.abs((np.array(true_values) - np.array(nn_predicted_values)) / np.array(true_values))) * 100
    nn_results.append((new_app_name, nn_mae, nn_rmse, nn_r2, nn_pred_error))


    # -------------------------- Train Neural CF --------------------------
    power_pair_map = {pair: i for i, pair in enumerate(perf_matrix.index)}
    app_map = {app: i for i, app in enumerate(perf_matrix.columns)}


    train_data = []
    train_labels = []
    for app in perf_matrix.columns:
        for power_pair in perf_matrix.index:
            if not np.isnan(perf_matrix.at[power_pair, app]):
                train_data.append([power_pair_map[power_pair], app_map[app]])
                train_labels.append(perf_matrix.at[power_pair, app])

    train_data = np.array(train_data)
    train_labels = np.array(train_labels)

    # Define and train CF model
    input_power_pair = Input(shape=(1,))
    input_app = Input(shape=(1,))
    latent_dim = 10
    power_embedding = Embedding(len(power_pair_map), latent_dim)(input_power_pair)
    app_embedding = Embedding(len(app_map), latent_dim)(input_app)
    power_vec = Flatten()(power_embedding)
    app_vec = Flatten()(app_embedding)
    merged = Concatenate()([power_vec, app_vec])
    dense_1 = Dense(128, activation='selu')(merged)
    dense_2 = Dense(64, activation='selu')(dense_1)
    dense_3 = Dense(32, activation='selu')(dense_2)
    output = Dense(1, activation='linear')(dense_3)

    ncf_model = Model(inputs=[input_power_pair, input_app], outputs=output)
    ncf_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
    ncf_model.fit([train_data[:, 0], train_data[:, 1]], train_labels, epochs=25, batch_size=32, verbose=0)

    for power_pair in perf_matrix.index:
        if np.isnan(perf_matrix.at[power_pair, new_app_name]):
            power_idx = power_pair_map[power_pair]
            app_idx = app_map[new_app_name]
            pred_value = ncf_model.predict([np.array([power_idx]), np.array([app_idx])], verbose=0)[0][0]
            perf_matrix.at[power_pair, new_app_name] = pred_value

    true_values_cf = df_new.set_index("Power Pair")["Performance"]
    predicted_values_cf = perf_matrix[new_app_name].reindex(true_values_cf.index)

    cf_mae = mean_absolute_error(true_values_cf, predicted_values_cf)
    cf_rmse = np.sqrt(mean_squared_error(true_values_cf, predicted_values_cf))
    cf_r2 = r2_score(true_values_cf, predicted_values_cf)
    cf_pred_error = np.mean(np.abs((true_values_cf - predicted_values_cf) / true_values_cf)) * 100
    cf_results.append((new_app_name, cf_mae, cf_rmse, cf_r2, cf_pred_error))

# Final results
nn_results_df = pd.DataFrame(nn_results, columns=["Application", "MAE", "RMSE", "R²", "Prediction Error (%)"])
cf_results_df = pd.DataFrame(cf_results, columns=["Application", "MAE", "RMSE", "R²", "Prediction Error (%)"])




In [64]:
# # Print stored accuracy metrics after all iterations
# print("\n=== Neural Network Prediction Accuracy ===")
# for app, mae, _, _, pred_error in nn_results:
#     print(f"{app}: MAE={mae:.4f}, Prediction Accuracy={100 - pred_error:.2f}%")

# print("\n=== Neural Collaborative Filtering Prediction Accuracy ===")
# for app, mae, _, _, pred_error in cf_results:
#     print(f"{app}: MAE={mae:.4f}, Prediction Accuracy={100 - pred_error:.2f}%")

# Convert results to dictionaries
nn_dict = {app: 100 - pred_error for app, _, _, _, pred_error in nn_results}
cf_dict = {app: 100 - pred_error for app, _, _, _, pred_error in cf_results}

# CPU-only apps
print("\n=== Prediction Accuracy (CPU-only Apps) ===")
print(f"{'App Name':<15} {'NN Accuracy':<15} {'NCF Accuracy'}")
for app in validation_data:
    if validation_data[app]["gpu_enabled"].iloc[0] == 0:
        print(f"{app:<15} {nn_dict[app]:<15.2f} {cf_dict[app]:.2f}")

# GPU apps
print("\n=== Prediction Accuracy (GPU Apps) ===")
print(f"{'App Name':<15} {'NN Accuracy':<15} {'NCF Accuracy'}")
for app in validation_data:
    if validation_data[app]["gpu_enabled"].iloc[0] == 1:
        print(f"{app:<15} {nn_dict[app]:<15.2f} {cf_dict[app]:.2f}")


=== Prediction Accuracy (CPU-only Apps) ===
App Name        NN Accuracy     NCF Accuracy
LULESH          95.64           93.38
mg              92.22           91.66
ua              90.98           93.18
ft              88.31           89.35
sp              99.28           97.89
lu              94.86           97.33
Nekbone         98.04           91.30

=== Prediction Accuracy (GPU Apps) ===
App Name        NN Accuracy     NCF Accuracy
lammps          97.92           98.01
Resnet50        98.09           96.11
sw4lite         93.45           94.09
Laghos          97.89           96.61
NAMD            96.07           93.76
miniGAN         96.51           95.28
gromacs         97.12           94.18
bert_large      95.63           92.04
UNet            96.84           96.49
XSBench         96.39           95.94


In [9]:
cpu_performance_matrix

Unnamed: 0,is,LULESH,cg,ua,miniFE,ft,sp,lu,Nekbone
"(260, 250)",0.648,0.751102,0.649054,0.650691,0.631,0.723851,0.693386,0.715833,1.01204
"(280, 250)",0.821467,0.876475,0.768282,0.807256,0.771533,0.87147,0.839209,0.815874,0.926375
"(300, 250)",0.960278,0.899113,0.933695,0.907936,0.884906,0.982691,0.927423,0.931769,1.02733
"(320, 250)",0.831476,0.924911,0.878882,0.868904,0.864651,0.928018,0.900728,0.87543,0.96404
"(340, 250)",0.753108,0.845468,0.789733,0.801694,0.781208,0.858662,0.820598,0.80442,0.923837
"(360, 250)",0.710614,0.775663,0.813928,0.770707,0.715294,0.817541,0.776649,0.781703,0.877139
"(380, 250)",0.809068,0.843137,0.837466,0.841151,0.84324,0.899039,0.852133,0.834181,0.956171
"(400, 250)",0.943592,0.901162,0.977415,0.930221,0.95057,0.997965,0.972134,0.958989,1.033735
"(420, 250)",0.510982,0.598108,0.62595,0.587324,0.532224,0.619896,0.603293,0.582488,0.714542
"(440, 250)",0.994127,0.969839,0.94434,0.997453,0.998197,1.026304,0.997041,0.947271,1.009012


In [10]:
gpu_performance_matrix

Unnamed: 0,lammps,Resnet50,sw4lite,Laghos,NAMD,miniGAN,gromacs,bert_large,UNet,CRADL,XSBench
"(140, 150)",0.796274,0.849827,0.757654,0.876382,0.776393,0.744410,0.772960,0.759710,0.772703,0.769315,0.856870
"(140, 160)",0.850724,0.937009,0.875004,0.928895,0.874289,0.840066,0.885414,0.882393,0.831552,0.872145,0.943023
"(140, 170)",0.869246,0.945818,0.862391,0.943195,0.865178,0.836595,0.908007,0.875339,0.829627,0.895789,0.961896
"(140, 180)",0.859747,0.970986,0.893666,0.983086,0.878331,0.870439,0.932822,0.902314,0.860988,0.909545,0.988296
"(140, 190)",0.775228,0.878783,0.767387,0.860052,0.799343,0.778106,0.818850,0.848630,0.828270,0.759427,0.778376
...,...,...,...,...,...,...,...,...,...,...,...
"(200, 210)",0.841840,0.884079,0.844263,0.906711,0.825621,0.798635,0.838607,0.842126,0.794043,0.820190,0.891763
"(200, 220)",0.858839,0.955748,0.888642,0.956127,0.872315,0.859132,0.902821,0.885454,0.838918,0.896803,0.954708
"(200, 230)",0.975418,0.998048,0.951522,1.062824,0.982381,0.974050,0.999608,0.983365,0.981092,0.975990,1.071594
"(200, 240)",0.877448,0.973748,0.896292,0.995141,0.884202,0.864739,0.952129,0.903062,0.856212,0.923183,0.985735


In [63]:
cpu_performance_matrix.to_csv("./prediction_res/performance_matrix_cpu.csv", index=True)

In [64]:
gpu_performance_matrix.to_csv("./prediction_res/performance_matrix_gpu.csv", index=True)