In [2]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.decomposition import TruncatedSVD

# Define directories for training and validation data
train_dir = "./altis_power_cap_res/2_dual_cap/train"
validation_dir = "./altis_power_cap_res/2_dual_cap/validation"
save_model_dir = "./altis_power_cap_res/2_dual_cap/model"
# scaler_X = pd.read_pickle("./altis_power_cap_res/2_dual_cap/model/scaler_X.pkl")
# scaler_y = pd.read_pickle("./altis_power_cap_res/2_dual_cap/model/scaler_y.pkl")
os.makedirs(save_model_dir, exist_ok=True)


2025-02-19 23:23:12.196988: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-19 23:23:12.210406: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740007392.226375 1311149 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740007392.231236 1311149 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-19 23:23:12.247347: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [7]:
# Load CSV files from train directory
train_csv_files = [f for f in os.listdir(train_dir) if f.endswith(".csv")]
validation_csv_files = [f for f in os.listdir(validation_dir) if f.endswith(".csv")]

# Load and merge training data
train_data = []
for file in train_csv_files:
    file_path = os.path.join(train_dir, file)
    df = pd.read_csv(file_path)
    train_data.append(df)
train_df = pd.concat(train_data, ignore_index=True)

# Load validation data per application
validation_data = {}
for file in validation_csv_files:
    file_path = os.path.join(validation_dir, file)
    df = pd.read_csv(file_path)
    validation_data[file] = df

# Drop any rows with missing values
train_df.dropna(inplace=True)
for key in validation_data:
    validation_data[key].dropna(inplace=True)

# Define feature columns and target column
feature_cols = ["CPU Power Cap", "GPU Power Cap", "IPS", "Memory Throughput", "SM Clock", "DRAM Active", "FP Active"]
target_col = "Performance"

# Extract features and target for training
X_train = train_df[feature_cols].values
y_train = train_df[target_col].values.reshape(-1, 1)

# Normalize features and target
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train)

# Build the improved MLP model
model = Sequential([
    Dense(256, activation='selu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='selu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Compile the model with a different optimizer
model.compile(optimizer='nadam', loss='mse', metrics=['mae'])

# Train the model
history = model.fit(X_train_scaled, y_train_scaled, epochs=50, batch_size=32, verbose=1)

# Save the trained model
model.save(os.path.join(save_model_dir, "performance_prediction_model.h5"))

# Evaluate the model for each application
for app_name, df_val in validation_data.items():
    # Sample 20% of the validation data
    df_sampled = df_val.sample(frac=1, random_state=42)  # Set random_state for reproducibility
    # print(df_sampled)
    
    # Extract features and target
    X_val = df_sampled[feature_cols].values
    y_val = df_sampled[target_col].values.reshape(-1, 1)

    # Normalize features
    X_val_scaled = scaler_X.transform(X_val)

    # Predict using the trained model
    y_pred = model.predict(X_val_scaled)

    # Reverse normalization (convert back to original scale)
    y_pred = scaler_y.inverse_transform(y_pred)
    y_val = scaler_y.inverse_transform(y_val)

    # Compute Accuracy Metrics
    mae = mean_absolute_error(y_val, y_pred)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    r2 = r2_score(y_val, y_pred)

    print(f"Validation Results for {app_name} (Sampled 20%):")
    print(f"MAE: {mae:.4f}, RMSE: {rmse:.4f}, R²: {r2:.4f}\n")

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1740007551.819179 1311654 service.cc:148] XLA service 0x7fab800101e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1740007551.819223 1311654 service.cc:156]   StreamExecutor device (0): NVIDIA A100-PCIE-40GB, Compute Capability 8.0
2025-02-19 23:25:51.908597: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1740007552.039761 1311654 cuda_dnn.cc:529] Loaded cuDNN version 90701


[1m23/44[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 2ms/step - loss: 1.0030 - mae: 0.7662 

I0000 00:00:1740007554.097135 1311654 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 44ms/step - loss: 0.7395 - mae: 0.6402
Epoch 2/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1584 - mae: 0.3088 
Epoch 3/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1068 - mae: 0.2574 
Epoch 4/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0849 - mae: 0.2321 
Epoch 5/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0625 - mae: 0.2008 
Epoch 6/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0543 - mae: 0.1852 
Epoch 7/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0480 - mae: 0.1744 
Epoch 8/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0409 - mae: 0.1596 
Epoch 9/50
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0373 



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 176ms/step
Validation Results for srad_performance.csv (Sampled 20%):
MAE: 0.0800, RMSE: 0.0947, R²: -1.0695

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Validation Results for sort_performance.csv (Sampled 20%):
MAE: 0.1300, RMSE: 0.1412, R²: -4.2333

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Validation Results for raytracing_performance.csv (Sampled 20%):
MAE: 0.0321, RMSE: 0.0364, R²: -0.9476



In [3]:
import os
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

model_path = os.path.join(save_model_dir, "performance_prediction_model.h5")  # Legacy format
# Load the model correctly
# Load the model with explicit loss function
model = tf.keras.models.load_model(model_path, custom_objects={"mse": tf.keras.losses.MeanSquaredError()})

# Load training data and create the initial performance matrix
train_csv_files = [f for f in os.listdir(train_dir) if f.endswith(".csv")]

# Dictionary to store application data
app_data = {}

# Process each CSV file (each application)
for file in train_csv_files:
    file_path = os.path.join(train_dir, file)
    app_name = file.replace("_performance.csv", "")
    
    df = pd.read_csv(file_path)
    required_columns = ["CPU Power Cap", "GPU Power Cap", "Performance"]
    df = df[required_columns]

    # Create a unique power pair column
    df["Power Pair"] = list(zip(df["CPU Power Cap"], df["GPU Power Cap"]))
    
    # Store application data
    app_data[app_name] = df[["Power Pair", "Performance"]].set_index("Power Pair")

# Combine all applications into a single 2D matrix
performance_matrix = pd.DataFrame(index=sorted(set().union(*[df.index for df in app_data.values()])),
                                  columns=sorted(app_data.keys()))


# Populate the performance matrix with training data
for app_name, df in app_data.items():
    performance_matrix[app_name] = df["Performance"]

I0000 00:00:1740007397.936568 1311149 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38485 MB memory:  -> device: 0, name: NVIDIA A100-PCIE-40GB, pci bus id: 0000:99:00.0, compute capability: 8.0


In [21]:

# Populate the performance matrix with training data
for app_name, df in app_data.items():
    performance_matrix[app_name] = df["Performance"]

# Process each new application in the validation folder
for file in validation_csv_files:
    file_path = os.path.join(validation_dir, file)
    new_app_name = file.replace("_performance.csv", "")

    # Load validation data
    df_new = pd.read_csv(file_path)
    df_new["Power Pair"] = list(zip(df_new["CPU Power Cap"], df_new["GPU Power Cap"]))

    # Add new application to performance matrix
    performance_matrix[new_app_name] = np.nan

    # Step 3: Predict 20% of power pairs using trained NN
    df_sampled = df_new.sample(frac=0.2, random_state=42)  # Select 20% of rows
    sampled_pairs = df_sampled["Power Pair"].unique()
    
    true_values, nn_predicted_values = [], []
    
    for power_pair in sampled_pairs:
        # Extract corresponding feature values for the selected power pair
        X_sample = df_new[df_new["Power Pair"] == power_pair][feature_cols].values
        
        # Normalize features
        X_sample_scaled = scaler_X.transform(X_sample)
        
        # Predict performance
        y_pred = model.predict(X_sample_scaled)
        predicted_value = scaler_y.inverse_transform(y_pred)[0][0]
    
        # Fill the performance matrix with NN predictions
        performance_matrix.at[power_pair, new_app_name] = predicted_value
    
        # Store true values if available (for validation)
        if power_pair in df_new.set_index("Power Pair").index:
            true_values.append(df_new.loc[df_new["Power Pair"] == power_pair, "Performance"].values[0])
            nn_predicted_values.append(predicted_value)
    
    # Step 4: Compute NN Prediction Accuracy (Before CF)
    nn_mae = mean_absolute_error(true_values, nn_predicted_values)
    nn_rmse = np.sqrt(mean_squared_error(true_values, nn_predicted_values))
    nn_r2 = r2_score(true_values, nn_predicted_values)
    
    print(f"Neural Network Prediction for {new_app_name} (Sampled 20% Power Pairs):")
    print(f"MAE: {nn_mae:.4f}, RMSE: {nn_rmse:.4f}, R²: {nn_r2:.4f}")

    # Step 5: Use Collaborative Filtering (SVD + PQ) to Complete the Matrix
    imputer = SimpleImputer(strategy='mean')  # Fill missing values with column mean
    filled_matrix = imputer.fit_transform(performance_matrix)  # Ensure no NaNs

    svd = TruncatedSVD(n_components=8)
    low_rank_matrix = svd.fit_transform(filled_matrix)
    predicted_matrix = svd.inverse_transform(low_rank_matrix)

    # Step 6: Compute CF Prediction Accuracy
    # Extract the rows in df_new that exist in the performance_matrix (where CF made predictions)
    true_values = df_new.set_index("Power Pair")["Performance"]
    predicted_values = predicted_df[new_app_name].reindex(true_values.index)

    # # Remove NaN values (some power pairs may not have CF predictions)
    # mask = ~true_values.isna() & ~predicted_values.isna()
    # true_values = true_values[mask]
    # predicted_values = predicted_values[mask]

    # Compute CF Prediction Accuracy
    cf_mae = mean_absolute_error(true_values, predicted_values)
    cf_rmse = np.sqrt(mean_squared_error(true_values, predicted_values))
    cf_r2 = r2_score(true_values, predicted_values)

    print(f"\nCollaborative Filtering Prediction Accuracy for {new_app_name}:")
    print(f"MAE: {cf_mae:.4f}, RMSE: {cf_rmse:.4f}, R²: {cf_r2:.4f}")

    # # Print True vs. CF Predicted values
    # print(f"\nTrue vs. Predicted Performance for {new_app_name}:")
    # print(pd.DataFrame({"True": true_values, "Predicted": predicted_values}))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45

In [13]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

validation_csv_files = [f for f in os.listdir(validation_dir) if f.endswith(".csv")]

# Populate the performance matrix with training data
for app_name, df in app_data.items():
    performance_matrix[app_name] = df["Performance"]

# Process each new application in the validation folder
for file in validation_csv_files:
    file_path = os.path.join(validation_dir, file)
    new_app_name = file.replace("_performance.csv", "")

    # Load validation data
    df_new = pd.read_csv(file_path)
    df_new["Power Pair"] = list(zip(df_new["CPU Power Cap"], df_new["GPU Power Cap"]))

    # Add new application to performance matrix
    performance_matrix[new_app_name] = np.nan

    # Step 3: Predict 20% of power pairs using trained NN
    df_sampled = df_new.sample(frac=0.2, random_state=42)  # Select 20% of rows
    sampled_pairs = df_sampled["Power Pair"].unique()
    
    true_values, nn_predicted_values = [], []
    
    for power_pair in sampled_pairs:
        # Extract corresponding feature values for the selected power pair
        X_sample = df_new[df_new["Power Pair"] == power_pair][feature_cols].values
        
        # Normalize features
        X_sample_scaled = scaler_X.transform(X_sample)
        
        # Predict performance
        y_pred = model.predict(X_sample_scaled)
        predicted_value = scaler_y.inverse_transform(y_pred)[0][0]
    
        # Fill the performance matrix with NN predictions
        performance_matrix.at[power_pair, new_app_name] = predicted_value
    
        # Store true values if available (for validation)
        if power_pair in df_new.set_index("Power Pair").index:
            true_values.append(df_new.loc[df_new["Power Pair"] == power_pair, "Performance"].values[0])
            nn_predicted_values.append(predicted_value)
    
    # Step 4: Compute NN Prediction Accuracy (Before CF)
    nn_mae = mean_absolute_error(true_values, nn_predicted_values)
    nn_rmse = np.sqrt(mean_squared_error(true_values, nn_predicted_values))
    nn_r2 = r2_score(true_values, nn_predicted_values)

    print(f"Neural Network Prediction for {new_app_name} (Sampled 20% Power Pairs):")
    print(f"MAE: {nn_mae:.4f}, RMSE: {nn_rmse:.4f}, R²: {nn_r2:.4f}")

    # Step 5: Neural Collaborative Filtering (NCF) to Complete the Matrix
    # Prepare training data (convert power pairs & apps to numeric indices)
    power_pair_map = {pair: i for i, pair in enumerate(performance_matrix.index)}
    app_map = {app: i for i, app in enumerate(performance_matrix.columns)}

    train_data = []
    train_labels = []
    
    for app in performance_matrix.columns:
        for power_pair in performance_matrix.index:
            if not np.isnan(performance_matrix.at[power_pair, app]):  # Only use observed values
                train_data.append([power_pair_map[power_pair], app_map[app]])
                train_labels.append(performance_matrix.at[power_pair, app])

    train_data = np.array(train_data)
    train_labels = np.array(train_labels)

    # Define Neural CF Model
    num_power_pairs = len(power_pair_map)
    num_apps = len(app_map)
    latent_dim = 10  # Embedding size

    input_power_pair = Input(shape=(1,))
    input_app = Input(shape=(1,))

    power_embedding = Embedding(num_power_pairs, latent_dim)(input_power_pair)
    app_embedding = Embedding(num_apps, latent_dim)(input_app)

    power_vec = Flatten()(power_embedding)
    app_vec = Flatten()(app_embedding)

    merged = Concatenate()([power_vec, app_vec])
    dense_1 = Dense(64, activation='relu')(merged)
    dense_2 = Dense(32, activation='relu')(dense_1)
    output = Dense(1, activation='linear')(dense_2)

    ncf_model = Model(inputs=[input_power_pair, input_app], outputs=output)
    ncf_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

    # Train the model
    ncf_model.fit([train_data[:, 0], train_data[:, 1]], train_labels, epochs=50, batch_size=32, verbose=1)

    # Step 6: Use the trained NCF model to predict missing values
    for power_pair in performance_matrix.index:
        if np.isnan(performance_matrix.at[power_pair, new_app_name]):
            power_idx = power_pair_map[power_pair]
            app_idx = app_map[new_app_name]
            pred_value = ncf_model.predict([np.array([power_idx]), np.array([app_idx])])[0][0]
            performance_matrix.at[power_pair, new_app_name] = pred_value

    # Compute CF Prediction Accuracy
    true_values = df_new.set_index("Power Pair")["Performance"]
    predicted_values = performance_matrix[new_app_name].reindex(true_values.index)

    cf_mae = mean_absolute_error(true_values, predicted_values)
    cf_rmse = np.sqrt(mean_squared_error(true_values, predicted_values))
    cf_r2 = r2_score(true_values, predicted_values)

    print(f"\nNeural CF Prediction Accuracy for {new_app_name}:")
    print(f"MAE: {cf_mae:.4f}, RMSE: {cf_rmse:.4f}, R²: {cf_r2:.4f}")

    # # Print True vs. CF Predicted values
    # print(f"\nTrue vs. Predicted Performance for {new_app_name}:")
    # print(pd.DataFrame({"True": true_values, "Predicted": predicted_values}))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50