In [1]:
import warnings
warnings.filterwarnings("ignore")
import os
import json
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
import joblib
import matplotlib.pyplot as plt

In [2]:
FUNCTION_TO_PREDICT = 'figlet'
PATH_TO_CSV = 'output/generator'
# Retrieve all files in the output folder
file_csv = [file for file in os.listdir(PATH_TO_CSV) if file.endswith('.csv')]
file_csv.sort()

In [3]:
# Initialize an empty DataFrame
df = pd.DataFrame()

for file in file_csv:
    if FUNCTION_TO_PREDICT in file:
        file_path = os.path.join(PATH_TO_CSV, file)
        # Read CSV file
        df = pd.read_csv(file_path)

# Delete 'name' column
df_no_name = df.drop(columns='name', errors='ignore')

# Scale
scaler = MinMaxScaler()

# Apply MinMax scaling to the DataFrame
df_no_name_scaled = scaler.fit_transform(df_no_name)

# Create a scaled DataFrame with the same columns
df_one_function_scaled = pd.DataFrame(df_no_name_scaled, columns=df_no_name.columns)

In [4]:
# Add the prefix "avg_" to each column
df_one_function_scaled.columns = 'avg_' + df_one_function_scaled.columns

# Calculate the average of the values for each group with the same 'rate'
compressed_data = df_one_function_scaled.groupby('avg_rate').mean().reset_index()

# Calculate the maximum values for each group of three rows
max_data = df_one_function_scaled.groupby('avg_rate').max().reset_index()

# Calculate the minimum values for each group of three rows
min_data = df_one_function_scaled.groupby('avg_rate').min().reset_index()

# Add the "max_" columns to the compressed DataFrame
compressed_data['max_success_rate'] = max_data['avg_success_rate']
compressed_data['max_cpu_usage'] = max_data['avg_cpu_usage']
compressed_data['max_ram_usage'] = max_data['avg_ram_usage']
compressed_data['max_power'] = max_data['avg_power_usage']
compressed_data['max_overloaded'] = max_data['avg_overloaded']
compressed_data['max_medium_latency'] = max_data['avg_medium_latency']

# Add the "min_" columns to the compressed DataFrame
compressed_data['min_success_rate'] = min_data['avg_success_rate']
compressed_data['min_cpu_usage'] = min_data['avg_cpu_usage']
compressed_data['min_ram_usage'] = min_data['avg_ram_usage']
compressed_data['min_power'] = min_data['avg_power_usage']
compressed_data['min_overloaded'] = min_data['avg_overloaded']
compressed_data['min_medium_latency'] = min_data['avg_medium_latency']

compressed_data = compressed_data.drop(columns='avg_rate')

# Concatenates all rows in the DataFrame into a vector
vector = compressed_data.to_numpy().flatten()

# Create a DataFrame from the vector
dataframe = pd.DataFrame(vector)

# Traspose Dataframe
dataframe = dataframe.T

# PCA
pca_model = joblib.load('pca_model.joblib')
pca_results = pca_model.transform(dataframe)

# Number of PCA dimensions
pca_dimensions = pca_results.shape[1]

# Create a DataFrame with the results of the PCA
pca_df = pd.DataFrame(data=pca_results, columns=[f'PC{i}' for i in range(1, pca_dimensions + 1)])

# Calculates the cumulative variance
explained_variance = pca_model.explained_variance_ratio_

summed_variance = 0
index = 0
while summed_variance <= 0.95:
    summed_variance += explained_variance[index]
    index += 1
after_column = "PC" + str(index)

# Truncate the dataframe based on the size taken with a variance greater than 0.95
pca_df = pca_df.truncate(before="PC1", after=after_column, axis="columns")

kmeans_model = joblib.load('profiling-model.joblib')
kmeans_predict = kmeans_model.predict(pca_df)

# Change group_list.json

# Extract the group key
kmeans_predict_key = tuple(kmeans_predict.tolist())
str_key = ",".join(map(str, kmeans_predict_key))

# Load 'group_list.json' file
file_path = 'group_list.json'
with open(file_path, 'r') as file:
    group_list = json.load(file)

# Add the new function to the corresponding group
if str_key in group_list:
    if FUNCTION_TO_PREDICT not in group_list[str_key]:
        group_list[str_key].append(FUNCTION_TO_PREDICT)
else:
    group_list[str_key] = [FUNCTION_TO_PREDICT]

# Save the 'group_list.json' file with the changes made
with open(file_path, 'w') as file:
    json.dump(group_list, file)