# Feature Engineer

In [2]:
import pandas as pd
import os

# File path and naming convention
input_directory = '/Users/zxgan/FYP_Kubernetes/Dataset/feature_engineer_dataset/'
output_directory = '/Users/zxgan/FYP_Kubernetes/Dataset/Feature_Engineered/'
file_prefix = 'node_node_'
file_suffix = '_dataset.csv'
num_files = 50

# Ensure output directory exists
os.makedirs(output_directory, exist_ok=True)

# Loop through all 50 datasets
for i in range(num_files):
    input_file_path = f"{input_directory}{file_prefix}{i}{file_suffix}"
    output_file_path = f"{output_directory}{file_prefix}{i}_feature_engineered.csv"
    print(f"Processing file: {input_file_path}")

    # Load the dataset
    data = pd.read_csv(input_file_path)

    # Resource Utilization Rates
    data['cpu_utilization_rate'] = data['cpu_usage'] / data['cpu_limit']
    data['memory_utilization_rate'] = data['memory_usage'] / data['memory_limit']

    # Resource Request Utilization Rates
    data['cpu_request_utilization_rate'] = data['cpu_usage'] / data['cpu_request']
    data['memory_request_utilization_rate'] = data['memory_usage'] / data['memory_request']

    # Rate of Change (Delta) Across Metrics
    data['node_cpu_usage_delta'] = data['node_cpu_usage'].diff()
    data['node_memory_usage_delta'] = data['node_memory_usage'].diff()
    data['network_bandwidth_usage_delta'] = data['network_bandwidth_usage'].diff()
    data['node_temperature_delta'] = data['node_temperature'].diff()

    # Fill NaN values with 0
    data = data.fillna(0)

    # Ratios for CPU, Memory, Disk, Network, and Temperature
    data['CPU_to_Memory_Ratio'] = data['cpu_usage'] / (data['memory_usage'] + 1e-5)
    data['CPU_to_Disk_Ratio'] = data['cpu_usage'] / (data['disk_io'] + 1e-5)
    data['CPU_to_Network_Ratio'] = data['cpu_usage'] / (data['network_bandwidth_usage'] + 1e-5)
    data['CPU_to_Temperature_Ratio'] = data['cpu_usage'] / (data['node_temperature'] + 1e-5)

    data['Memory_to_CPU_Ratio'] = data['memory_usage'] / (data['cpu_usage'] + 1e-5)
    data['Memory_to_Disk_Ratio'] = data['memory_usage'] / (data['disk_io'] + 1e-5)
    data['Memory_to_Network_Ratio'] = data['memory_usage'] / (data['network_bandwidth_usage'] + 1e-5)
    data['Memory_to_Temperature_Ratio'] = data['memory_usage'] / (data['node_temperature'] + 1e-5)

    data['Disk_to_CPU_Ratio'] = data['disk_io'] / (data['cpu_usage'] + 1e-5)
    data['Disk_to_Memory_Ratio'] = data['disk_io'] / (data['memory_usage'] + 1e-5)
    data['Disk_to_Network_Ratio'] = data['disk_io'] / (data['network_bandwidth_usage'] + 1e-5)
    data['Disk_to_Temperature_Ratio'] = data['disk_io'] / (data['node_temperature'] + 1e-5)

    data['Network_to_CPU_Ratio'] = data['network_bandwidth_usage'] / (data['cpu_usage'] + 1e-5)
    data['Network_to_Memory_Ratio'] = data['network_bandwidth_usage'] / (data['memory_usage'] + 1e-5)
    data['Network_to_Disk_Ratio'] = data['network_bandwidth_usage'] / (data['disk_io'] + 1e-5)
    data['Network_to_Temperature_Ratio'] = data['network_bandwidth_usage'] / (data['node_temperature'] + 1e-5)

    data['Temperature_to_CPU_Ratio'] = data['node_temperature'] / (data['cpu_usage'] + 1e-5)
    data['Temperature_to_Memory_Ratio'] = data['node_temperature'] / (data['memory_usage'] + 1e-5)
    data['Temperature_to_Disk_Ratio'] = data['node_temperature'] / (data['disk_io'] + 1e-5)
    data['Temperature_to_Network_Ratio'] = data['node_temperature'] / (data['network_bandwidth_usage'] + 1e-5)

    # Peak Detection for CPU and Memory Usage
    data['cpu_usage_peak'] = (data['cpu_usage'] > 0.9 * data['cpu_limit']).astype(int)
    data['memory_usage_peak'] = (data['memory_usage'] > 0.9 * data['memory_limit']).astype(int)

    # Save the feature-engineered dataset
    data.to_csv(output_file_path, index=False)
    print(f"Feature-engineered file saved to: {output_file_path}")

print("Processing complete for all datasets.")

Processing file: /Users/zxgan/FYP_Kubernetes/Dataset/feature_engineer_dataset/node_node_0_dataset.csv
Feature-engineered file saved to: /Users/zxgan/FYP_Kubernetes/Dataset/Feature_Engineered/node_node_0_feature_engineered.csv
Processing file: /Users/zxgan/FYP_Kubernetes/Dataset/feature_engineer_dataset/node_node_1_dataset.csv
Feature-engineered file saved to: /Users/zxgan/FYP_Kubernetes/Dataset/Feature_Engineered/node_node_1_feature_engineered.csv
Processing file: /Users/zxgan/FYP_Kubernetes/Dataset/feature_engineer_dataset/node_node_2_dataset.csv
Feature-engineered file saved to: /Users/zxgan/FYP_Kubernetes/Dataset/Feature_Engineered/node_node_2_feature_engineered.csv
Processing file: /Users/zxgan/FYP_Kubernetes/Dataset/feature_engineer_dataset/node_node_3_dataset.csv
Feature-engineered file saved to: /Users/zxgan/FYP_Kubernetes/Dataset/Feature_Engineered/node_node_3_feature_engineered.csv
Processing file: /Users/zxgan/FYP_Kubernetes/Dataset/feature_engineer_dataset/node_node_4_datase