In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the CSV file
file_path = 'updated_results_all_models.csv'
data = pd.read_csv(file_path)

# Split the `Image` into `image` and `image_size`
data[['image', 'image_size']] = data['Image'].str.extract(r'(.+)_([0-9]+x[0-9]+)')

# Extract the pruning percentage from `Model`
def extract_pruning_percentage(model_name):
    try:
        if 'pruned_model_' in model_name:
            return int(model_name.split('_')[-1].split('.')[0])
        else:
            return 0
    except Exception as e:
        print(f"Error parsing model name {model_name}: {e}")
        return None

# Apply the function to create a new column `pruning_percentage`
data['pruning_percentage'] = data['Model'].apply(extract_pruning_percentage)

# Drop unnecessary columns
columns_to_keep = ['Time (s)', 'Memory (MB)', 'image_size', 'pruning_percentage']
data = data[columns_to_keep]

# Calculate the mean time and memory usage for each pruning percentage and image size
grouped_data = data.groupby(['pruning_percentage', 'image_size']).mean().reset_index()

# Calculate the mean time and memory for the original model (0% pruning) for each image size
baseline = grouped_data[grouped_data['pruning_percentage'] == 0].set_index('image_size')

# Function to calculate percentage change compared to baseline
def calculate_percentage_change(row, baseline, metric):
    image_size = row['image_size']
    baseline_value = baseline.loc[image_size, metric]
    if baseline_value == 0:
        return np.nan  # or handle this case as required
    return (row[metric] - baseline_value) / baseline_value * 100

# Calculate percentage changes for means
grouped_data['Time_Change_mean (%)'] = grouped_data.apply(lambda row: calculate_percentage_change(row, baseline, 'Time (s)'), axis=1)
grouped_data['Memory_Change_mean (%)'] = grouped_data.apply(lambda row: calculate_percentage_change(row, baseline, 'Memory (MB)'), axis=1)

# Filter data to include only pruning percentages up to 25%
filtered_data = grouped_data[grouped_data['pruning_percentage'] <= 25]

# Inspect the filtered dataframe
print(filtered_data)

# Plotting function for percentage changes
def plot_efficiency(data, metric_mean, ylabel, title, filename):
    plt.figure(figsize=(14, 8))
    sns.lineplot(data=data, x='pruning_percentage', y=metric_mean, hue='image_size', marker='o')
    plt.title(title)
    plt.xlabel('Pruning Percentage')
    plt.ylabel(ylabel)
    plt.legend(title='Image Size')
    plt.grid(True)
    plt.savefig(filename, format='pdf')
    plt.show()

# Plot percentage change in time and memory usage for different image sizes
plot_efficiency(filtered_data, 'Time_Change_mean (%)', 'Percentage Change in Time (%)', 'Time Efficiency for Different Pruning Percentages', 'time_efficiency.pdf')
plot_efficiency(filtered_data, 'Memory_Change_mean (%)', 'Percentage Change in Memory (%)', 'Memory Efficiency for Different Pruning Percentages', 'memory_efficiency.pdf')


In [None]:
import pandas as pd

# Load the CSV file
file_path = 'updated_results_all_models.csv'
data = pd.read_csv(file_path)

# Split the `Image` into `image` and `image_size`
data[['image', 'image_size']] = data['Image'].str.extract(r'(.+)_([0-9]+x[0-9]+)')

# Extract the pruning percentage from `Model`
def extract_pruning_percentage(model_name):
    try:
        if 'pruned_model_' in model_name:
            return int(model_name.split('_')[-1].split('.')[0])
        else:
            return 0
    except Exception as e:
        print(f"Error parsing model name {model_name}: {e}")
        return None

# Apply the function to create a new column `pruning_percentage`
data['pruning_percentage'] = data['Model'].apply(extract_pruning_percentage)

# Drop unnecessary columns
columns_to_keep = ['Time (s)', 'Memory (MB)', 'image_size', 'pruning_percentage']
data = data[columns_to_keep]

# Calculate the mean time and memory usage for each pruning percentage and image size
grouped_data = data.groupby(['pruning_percentage', 'image_size']).mean().reset_index()

# Calculate the mean time and memory for the original model (0% pruning) for each image size
baseline = grouped_data[grouped_data['pruning_percentage'] == 0]

# Print the average time and memory usage for the original model (0% pruning) per image size
for image_size in baseline['image_size'].unique():
    avg_time = baseline[baseline['image_size'] == image_size]['Time (s)'].mean()
    avg_memory = baseline[baseline['image_size'] == image_size]['Memory (MB)'].mean()
    print(f"Image Size: {image_size}")
    print(f"  Average Time for 0% Pruning: {avg_time:.2f} seconds")
    print(f"  Average Memory for 0% Pruning: {avg_memory:.2f} MB")
