In [24]:
import numpy as np
import tensorly as tl
from tensorly.decomposition import parafac

# Step 1: Create the Initial 3D Tensor (2x2x3) with Missing Values (NaNs)
tensor_data = np.array([
    [[(1.5 + 200) / 2, (1.8 + 220) / 2, np.nan],  # CPU 100, GPU 50
     [(2.0 + 250) / 2, (2.2 + 230) / 2, np.nan]], # CPU 100, GPU 100

    [[(1.6 + 180) / 2, (1.9 + 210) / 2, np.nan],  # CPU 200, GPU 50
     [(2.3 + 280) / 2, (2.5 + 300) / 2, (2.6 + 290) / 2]]  # CPU 200, GPU 100
])

# Print the Results
print("Original Tensor (with NaNs):")
print(tensor_data)

# 🔹 Step 2: Fill NaNs with Initial Estimates (Mean Imputation)
nan_mask = np.isnan(tensor_data)  # Identify missing values
mean_value = np.nanmean(tensor_data) if np.any(~nan_mask) else 0  # Compute mean
tensor_filled = tensor_data.copy()
tensor_filled[nan_mask] = mean_value  # Fill NaNs with the mean, but keep the mask

# 🔹 Step 3: Apply CP Tensor Factorization
mask = ~nan_mask  # Use only known values for training
rank = min(5, tensor_data.shape[2])  # Ensure enough rank for better approximation

factors = parafac(tensor_filled, rank=rank, init='svd', mask=mask)  # Use 'svd' initialization

# 🔹 Step 4: Reconstruct the Completed Tensor
completed_tensor = tl.cp_to_tensor(factors)

# 🔹 Step 5: Restore Missing Value Positions with CP Predictions
tensor_data[nan_mask] = completed_tensor[nan_mask]


print("\nReconstructed Tensor (Predicted NaNs):")
print(completed_tensor)

Original Tensor (with NaNs):
[[[100.75 110.9     nan]
  [126.   116.1     nan]]

 [[ 90.8  105.95    nan]
  [141.15 151.25 146.3 ]]]

Reconstructed Tensor (Predicted NaNs):
[[[100.75000607 110.89999335 118.60554921]
  [125.99999959 116.10000045 120.50929949]]

 [[ 90.79999704 105.95000324 124.62033106]
  [141.14999793 151.25000226 146.29999822]]]


In [27]:
import os
import pandas as pd
import numpy as np
import tensorly as tl
from tensorly.decomposition import parafac, tucker
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.decomposition import TruncatedSVD
from sklearn.impute import SimpleImputer
from statistics import mean 

def cf():
    selected_combinations = [
    (120, 110), (120, 250), (140, 110), (140, 250), 
    (160, 110), (160, 250), (180, 110), (180, 250), 
    (200, 110), (200, 120), (540, 250)]
    
    RMSE = []
    # Define directories
    offline_dir = "./altis_power_cap_res/offline/"
    online_dir = "./altis_power_cap_res/online/"
    
    # Get all application CSV files from offline data
    offline_csv_files = [f for f in os.listdir(offline_dir) if f.endswith("_performance.csv")]
    online_csv_files = [f for f in os.listdir(online_dir) if f.endswith("_performance.csv")]
    
    # Extract application names
    offline_apps = sorted([f.replace("_performance.csv", "") for f in offline_csv_files])
    online_apps = sorted([f.replace("_performance.csv", "") for f in online_csv_files])
    
    # Load offline data
    df_list = []
    for file in offline_csv_files:
        app_name = file.replace("_performance.csv", "")
        df = pd.read_csv(os.path.join(offline_dir, file))
        df["App"] = app_name
        df_list.append(df)
    
    df_full = pd.concat(df_list, ignore_index=True)
    
    # Step 1: Encode CPU Power, GPU Power, and Applications
    cpu_power_levels = sorted(df_full["CPU Power Cap"].unique())
    gpu_power_levels = sorted(df_full["GPU Power Cap"].unique())
    
    cpu_index_map = {power: idx for idx, power in enumerate(cpu_power_levels)}
    gpu_index_map = {power: idx for idx, power in enumerate(gpu_power_levels)}
    app_index_map = {app: idx for idx, app in enumerate(offline_apps)}
    
    num_cpu = len(cpu_power_levels)
    num_gpu = len(gpu_power_levels)
    num_apps = len(offline_apps)
    
    # Step 2: Initialize 3D Tensor (IPS + FLOPs)/2
    tensor_data = np.full((num_cpu, num_gpu, num_apps), np.nan)
    
    # Populate tensor with offline data
    for _, row in df_full.iterrows():
        cpu_idx = cpu_index_map[row["CPU Power Cap"]]
        gpu_idx = gpu_index_map[row["GPU Power Cap"]]
        app_idx = app_index_map[row["App"]]
        tensor_data[cpu_idx, gpu_idx, app_idx] = (row["IPS"] + row["FLOPS"]) / 2
    
    # Step 3: Normalize Data for Stability
    tensor_max = np.nanmax(tensor_data)
    tensor_data /= tensor_max  # Scale between [0, 1]
    
    # Step 4: Process Online Applications
    for file in online_csv_files:
        app_name = file.replace("_performance.csv", "")
        df_online = pd.read_csv(os.path.join(online_dir, file))
        df_online["App"] = app_name
    
        # Expand tensor (add new app dimension)
        tensor_data = np.pad(tensor_data, ((0, 0), (0, 0), (0, 1)), constant_values=np.nan)
        app_index_map[app_name] = tensor_data.shape[2] - 1  # Update app index
    
        # # Select 20% of rows as known values
        df_known = df_online.sample(frac=0.2, random_state=42)
        df_test = df_online.drop(df_known.index)
        # Fill tensor with known values
        for _, row in df_known.iterrows():
            cpu_idx = cpu_index_map[row["CPU Power Cap"]]
            gpu_idx = gpu_index_map[row["GPU Power Cap"]]
            app_idx = app_index_map[row["App"]]
            tensor_data[cpu_idx, gpu_idx, app_idx] = (row["IPS"] + row["FLOPS"]) / 2 / tensor_max

        # df_known = df_online[df_online[["CPU Power Cap", "GPU Power Cap"]].apply(tuple, axis=1).isin(selected_combinations)]

        # # Remaining rows are for testing
        # df_test = df_online.drop(df_known.index)
        
        # # Fill tensor with known values
        # for _, row in df_known.iterrows():
        #     cpu_idx = cpu_index_map[row["CPU Power Cap"]]
        #     gpu_idx = gpu_index_map[row["GPU Power Cap"]]
        #     app_idx = app_index_map[row["App"]]
        #     tensor_data[cpu_idx, gpu_idx, app_idx] = (row["IPS"] + row["FLOPS"]) / 2 / tensor_max
    
        # Step 5: Use SVD-Based Imputation for NaNs
        nan_mask = np.isnan(tensor_data)
        tensor_2d = tensor_data.reshape(-1, tensor_data.shape[-1])  # Flatten only last axis
        
        # Fill NaNs with the mean before SVD
        imputer = SimpleImputer(strategy="mean")
        tensor_2d_filled = imputer.fit_transform(tensor_2d)
        
        # Apply SVD
        svd = TruncatedSVD(n_components=min(3, tensor_2d_filled.shape[1]-1))
        low_rank_approx = svd.fit_transform(tensor_2d_filled)
        tensor_filled = svd.inverse_transform(low_rank_approx)  # Restores original structure
        
        # Reshape back to original tensor shape
        tensor_filled = tensor_filled.reshape(tensor_data.shape)
    
        # Step 6: Apply Tucker Decomposition Instead of CP
        ranks = [min(10, tensor_data.shape[0]), min(10, tensor_data.shape[1]), min(10, tensor_data.shape[2])]
        core, factors = tucker(tensor_filled, rank=ranks)
        completed_tensor = tl.tucker_to_tensor((core, factors))
    
        # Step 7: Evaluate Predictions
        test_data = []
        for _, row in df_test.iterrows():
            cpu_idx = cpu_index_map[row["CPU Power Cap"]]
            gpu_idx = gpu_index_map[row["GPU Power Cap"]]
            app_idx = app_index_map[row["App"]]
            test_data.append((cpu_idx, gpu_idx, app_idx, (row["IPS"] + row["FLOPS"]) / 2 / tensor_max))
    
        true_values, predicted_values = [], []
        for cpu_idx, gpu_idx, app_idx, true_val in test_data:
            predicted_val = completed_tensor[cpu_idx, gpu_idx, app_idx]
            true_values.append(true_val)
            predicted_values.append(predicted_val)
    
        # Compute Accuracy Metrics
        mae = mean_absolute_error(true_values, predicted_values) * tensor_max  # Rescale values
        rmse = np.sqrt(mean_squared_error(true_values, predicted_values)) * tensor_max
        r2 = r2_score(true_values, predicted_values)
        RMSE.append(rmse)
    
        # print(f"Prediction Accuracy for {app_name}: MAE={mae:.4f}, RMSE={rmse:.4f}, R²={r2:.4f}")
    return round(float(mean(RMSE)),4)

In [25]:
import os
import pandas as pd
import numpy as np
import tensorly as tl
from tensorly.decomposition import tucker
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.decomposition import TruncatedSVD
from sklearn.impute import SimpleImputer
from statistics import mean

def cf():
    selected_combinations = [
        (120, 110), (120, 250), (140, 110), (140, 250), 
        (160, 110), (160, 250), (180, 110), (180, 250), 
        (200, 110), (200, 120), (540, 250)
    ]
    
    RMSE_IPS = []
    RMSE_FLOPS = []
    
    offline_dir = "./altis_power_cap_res/offline/"
    online_dir = "./altis_power_cap_res/online/"
    
    offline_csv_files = [f for f in os.listdir(offline_dir) if f.endswith("_performance.csv")]
    online_csv_files = [f for f in os.listdir(online_dir) if f.endswith("_performance.csv")]
    
    offline_apps = sorted([f.replace("_performance.csv", "") for f in offline_csv_files])
    online_apps = sorted([f.replace("_performance.csv", "") for f in online_csv_files])
    
    df_list = []
    for file in offline_csv_files:
        app_name = file.replace("_performance.csv", "")
        df = pd.read_csv(os.path.join(offline_dir, file))
        df["App"] = app_name
        df_list.append(df)
    
    df_full = pd.concat(df_list, ignore_index=True)
    
    cpu_power_levels = sorted(df_full["CPU Power Cap"].unique())
    gpu_power_levels = sorted(df_full["GPU Power Cap"].unique())
    
    cpu_index_map = {power: idx for idx, power in enumerate(cpu_power_levels)}
    gpu_index_map = {power: idx for idx, power in enumerate(gpu_power_levels)}
    app_index_map = {app: idx for idx, app in enumerate(offline_apps)}
    
    num_cpu = len(cpu_power_levels)
    num_gpu = len(gpu_power_levels)
    num_apps = len(offline_apps)
    
    tensor_ips = np.full((num_cpu, num_gpu, num_apps), np.nan)
    tensor_flops = np.full((num_cpu, num_gpu, num_apps), np.nan)
    
    for _, row in df_full.iterrows():
        cpu_idx = cpu_index_map[row["CPU Power Cap"]]
        gpu_idx = gpu_index_map[row["GPU Power Cap"]]
        app_idx = app_index_map[row["App"]]
        tensor_ips[cpu_idx, gpu_idx, app_idx] = row["IPS"]
        tensor_flops[cpu_idx, gpu_idx, app_idx] = row["FLOPS"]
    
    ips_max = np.nanmax(tensor_ips)
    flops_max = np.nanmax(tensor_flops)
    
    tensor_ips /= ips_max
    tensor_flops /= flops_max
    
    for file in online_csv_files:
        app_name = file.replace("_performance.csv", "")
        df_online = pd.read_csv(os.path.join(online_dir, file))
        df_online["App"] = app_name
    
        tensor_ips = np.pad(tensor_ips, ((0, 0), (0, 0), (0, 1)), constant_values=np.nan)
        tensor_flops = np.pad(tensor_flops, ((0, 0), (0, 0), (0, 1)), constant_values=np.nan)
        app_index_map[app_name] = tensor_ips.shape[2] - 1  
    
        df_known = df_online[df_online[["CPU Power Cap", "GPU Power Cap"]].apply(tuple, axis=1).isin(selected_combinations)]
        df_test = df_online.drop(df_known.index)
        
        for _, row in df_known.iterrows():
            cpu_idx = cpu_index_map[row["CPU Power Cap"]]
            gpu_idx = gpu_index_map[row["GPU Power Cap"]]
            app_idx = app_index_map[row["App"]]
            tensor_ips[cpu_idx, gpu_idx, app_idx] = row["IPS"] / ips_max
            tensor_flops[cpu_idx, gpu_idx, app_idx] = row["FLOPS"] / flops_max

        def svd_impute(tensor):
            nan_mask = np.isnan(tensor)
            tensor_2d = tensor.reshape(-1, tensor.shape[-1])
            
            imputer = SimpleImputer(strategy="mean")
            tensor_2d_filled = imputer.fit_transform(tensor_2d)
            
            svd = TruncatedSVD(n_components=min(3, tensor_2d_filled.shape[1]-1))
            low_rank_approx = svd.fit_transform(tensor_2d_filled)
            tensor_filled = svd.inverse_transform(low_rank_approx)
            
            return tensor_filled.reshape(tensor.shape)
        
        tensor_ips_filled = svd_impute(tensor_ips)
        tensor_flops_filled = svd_impute(tensor_flops)
        
        ranks = [min(10, tensor_ips.shape[0]), min(10, tensor_ips.shape[1]), min(10, tensor_ips.shape[2])]
        core_ips, factors_ips = tucker(tensor_ips_filled, rank=ranks)
        completed_ips = tl.tucker_to_tensor((core_ips, factors_ips))

        core_flops, factors_flops = tucker(tensor_flops_filled, rank=ranks)
        completed_flops = tl.tucker_to_tensor((core_flops, factors_flops))
    
        test_data_ips = []
        test_data_flops = []
        
        for _, row in df_test.iterrows():
            cpu_idx = cpu_index_map[row["CPU Power Cap"]]
            gpu_idx = gpu_index_map[row["GPU Power Cap"]]
            app_idx = app_index_map[row["App"]]
            test_data_ips.append((cpu_idx, gpu_idx, app_idx, row["IPS"] / ips_max))
            test_data_flops.append((cpu_idx, gpu_idx, app_idx, row["FLOPS"] / flops_max))
    
        true_ips, pred_ips = [], []
        true_flops, pred_flops = [], []
        
        for cpu_idx, gpu_idx, app_idx, true_val in test_data_ips:
            pred_val = completed_ips[cpu_idx, gpu_idx, app_idx]
            true_ips.append(true_val)
            pred_ips.append(pred_val)
        
        for cpu_idx, gpu_idx, app_idx, true_val in test_data_flops:
            pred_val = completed_flops[cpu_idx, gpu_idx, app_idx]
            true_flops.append(true_val)
            pred_flops.append(pred_val)
    
        mae_ips = mean_absolute_error(true_ips, pred_ips) * ips_max
        rmse_ips = np.sqrt(mean_squared_error(true_ips, pred_ips)) * ips_max
        r2_ips = r2_score(true_ips, pred_ips)
        
        mae_flops = mean_absolute_error(true_flops, pred_flops) * flops_max
        rmse_flops = np.sqrt(mean_squared_error(true_flops, pred_flops)) * flops_max
        r2_flops = r2_score(true_flops, pred_flops)
        
        RMSE_IPS.append(rmse_ips)
        RMSE_FLOPS.append(rmse_flops)
    
        # print(f"IPS Prediction Accuracy: MAE={mae_ips:.4f}, RMSE={rmse_ips:.4f}, R²={r2_ips:.4f}")
        # print(f"FLOPS Prediction Accuracy: MAE={mae_flops:.4f}, RMSE={rmse_flops:.4f}, R²={r2_flops:.4f}")

    return round(float(mean(RMSE_IPS)), 4), round(float(mean(RMSE_FLOPS)), 4)

In [28]:
cf()

0.1274

In [24]:
from pathlib import Path
import itertools
import shutil

# Define directories
SOURCE_DIR = Path("./altis_power_cap_res")
OFFLINE_DIR = SOURCE_DIR / "offline"
ONLINE_DIR = SOURCE_DIR / "online"

# Ensure offline and online directories exist
OFFLINE_DIR.mkdir(parents=True, exist_ok=True)
ONLINE_DIR.mkdir(parents=True, exist_ok=True)

# Get all performance CSV files
files = list(SOURCE_DIR.glob("*_performance.csv"))

avg_rmse = float('inf')  # Initialize to infinity
best_combinations = []  # Store best combinations

# Function to move files back to the source directory
def move_back():
    for file in OFFLINE_DIR.glob("*.csv"):  # Only move CSV files
        shutil.move(str(file), str(SOURCE_DIR))
    for file in ONLINE_DIR.glob("*.csv"):  # Only move CSV files
        shutil.move(str(file), str(SOURCE_DIR))

# Generate combinations of 14 files from the total files
for combo in itertools.combinations(files, 7):
    # Move selected 14 files to offline
    for file in combo:
        shutil.move(str(file), str(OFFLINE_DIR))

    # Move the remaining 2 files to online
    remaining_files = list(SOURCE_DIR.glob("*_performance.csv"))
    for file in remaining_files:
        shutil.move(str(file), str(ONLINE_DIR))

    # Compute RMSE for this combination
    current_rmse = cf()

    # Update best combinations if a new minimum RMSE is found
    if current_rmse < avg_rmse:
        avg_rmse = current_rmse
        best_combinations = [(list(combo), remaining_files)]  # Reset best list
    elif current_rmse == avg_rmse:
        best_combinations.append((list(combo), remaining_files))  # Store additional best combos

    # Move files back to original directory
    move_back()

# Print the best RMSE and corresponding combinations
print(f"Smallest avg_rmse: {avg_rmse}")
print("Best Combinations:")

for i, (best_combo, remaining_files) in enumerate(best_combinations, 1):
    print(f"\nCombination {i}:")
    print(f" - Offline (14 files): {[file.name for file in best_combo]}")
    print(f" - Online (2 files): {[file.name for file in remaining_files]}")


Smallest avg_rmse: inf
Best Combinations:
