In [3]:
import os
import pandas as pd
import numpy as np
import tensorly as tl
from tensorly.decomposition import parafac
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Step 1: Get all application names from filenames
data_dir = "./altis_power_cap_res/offline/"
csv_files = [f for f in os.listdir(data_dir) if f.endswith("_performance.csv")]

# Extract application names from filenames
applications = sorted([f.replace("_performance.csv", "") for f in csv_files])

# Step 2: Load all CSV files and add the application name
df_list = []
for file in csv_files:
    app_name = file.replace("_performance.csv", "")  # Extract application name
    file_path = os.path.join(data_dir, file)
    df = pd.read_csv(file_path)
    df["App Name"] = app_name  # Add application name column
    df_list.append(df)

# Combine all application data into one DataFrame
df = pd.concat(df_list, ignore_index=True)

# Step 3: Encode CPU Power, GPU Power, and Applications into indices
cpu_power_levels = sorted(df["CPU Power Cap"].unique())
gpu_power_levels = sorted(df["GPU Power Cap"].unique())

cpu_index_map = {power: idx for idx, power in enumerate(cpu_power_levels)}
gpu_index_map = {power: idx for idx, power in enumerate(gpu_power_levels)}
app_index_map = {app: idx for idx, app in enumerate(applications)}

# Tensor dimensions
num_cpu = len(cpu_power_levels)
num_gpu = len(gpu_power_levels)
num_apps = len(applications)

# Step 4: Initialize 3D Tensor (IPC, FLOPs)
tensor_data = np.full((num_cpu, num_gpu, num_apps, 2), np.nan)  # Use NaN for missing values
test_data = []  # Store test data separately for evaluation

# Step 5: Populate tensor with 50% data and store remaining for evaluation
for _, row in df.iterrows():
    cpu_idx = cpu_index_map[row["CPU Power Cap"]]
    gpu_idx = gpu_index_map[row["GPU Power Cap"]]
    app_idx = app_index_map[row["App Name"]]
    
    # Randomly assign 50% of data to training and 50% to test
    if np.random.rand() > 0.5:
        tensor_data[cpu_idx, gpu_idx, app_idx, 0] = row["IPS"]   # IPC values
        tensor_data[cpu_idx, gpu_idx, app_idx, 1] = row["FLOPS"] # FLOPs values
    else:
        test_data.append((cpu_idx, gpu_idx, app_idx, row["IPC"], row["FLOPS"]))

# Step 6: Apply CP Decomposition for CF prediction
rank = 3  # Number of latent factors
factors = parafac(tensor_data, rank=rank, init='random', mask=~np.isnan(tensor_data))

# Reconstruct tensor with predicted values
completed_tensor = tl.kruskal_to_tensor(factors)

# Step 7: Evaluate predictions
true_ipc, pred_ipc, true_flops, pred_flops = [], [], [], []

for (cpu_idx, gpu_idx, app_idx, ipc_true, flops_true) in test_data:
    ipc_pred = completed_tensor[cpu_idx, gpu_idx, app_idx, 0]
    flops_pred = completed_tensor[cpu_idx, gpu_idx, app_idx, 1]
    
    true_ipc.append(ipc_true)
    pred_ipc.append(ipc_pred)
    true_flops.append(flops_true)
    pred_flops.append(flops_pred)

# Compute accuracy metrics
mae_ipc = mean_absolute_error(true_ipc, pred_ipc)
rmse_ipc = np.sqrt(mean_squared_error(true_ipc, pred_ipc))
r2_ipc = r2_score(true_ipc, pred_ipc)

mae_flops = mean_absolute_error(true_flops, pred_flops)
rmse_flops = np.sqrt(mean_squared_error(true_flops, pred_flops))
r2_flops = r2_score(true_flops, pred_flops)

# Print results
print(f"IPC Prediction: MAE={mae_ipc:.4f}, RMSE={rmse_ipc:.4f}, R²={r2_ipc:.4f}")
print(f"FLOPs Prediction: MAE={mae_flops:.4f}, RMSE={rmse_flops:.4f}, R²={r2_flops:.4f}")


KeyError: 'IPC'