<a href="https://colab.research.google.com/github/vibhuverma17/MLBASEDSAMPLING/blob/main/Plotting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

file_path = '/content/Results.xlsx'
sheet_name = 'Parkinsons'

df = pd.read_excel(file_path, sheet_name=sheet_name)  # Replace 0 with the sheet name if necessary
selected_columns = df[['Test R2', 'Model Specs', 'Total Time', 'Sampling Time', 'Training Time (seconds)']]

selected_columns['Test R2'] = np.where(selected_columns['Test R2'] < 0, 0, selected_columns['Test R2'])

selected_columns

In [None]:
# Set a style for prettier plots
sns.set_theme(style="whitegrid")

# Ask the user for the columns to be plotted
print("Please choose the metric columns for plotting:")
print("Available metrics: Test AUC, Training Time (seconds)")
test_auc_column = input("Enter the column name for the Test AUC metric (default: 'Test AUC'): ") or "Test AUC"
training_time_column = input("Enter the column name for the Training Time metric (default: 'Training Time (seconds)'): ") or "Training Time (seconds)"

# Check if the provided columns exist in the dataset
if test_auc_column not in selected_columns.columns or training_time_column not in selected_columns.columns:
    print(f"Error: One or both columns '{test_auc_column}' and '{training_time_column}' do not exist in the dataset.")
else:
    # Sort by Test AUC (high to low)
    sorted_by_auc = selected_columns.sort_values(by=test_auc_column, ascending=False)

    # Sort by Total Training Time (high to low)
    sorted_by_training_time = selected_columns.sort_values(by=training_time_column, ascending=False)

    # Create a figure and axis
    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Plot Model Specs vs Test AUC (in Blue) on the left y-axis
    sns.lineplot(data=sorted_by_auc, x='Model Specs', y=test_auc_column,
                 marker='o', color='blue', linestyle='-', markersize=8,
                 markerfacecolor='none', markeredgewidth=2, ax=ax1)
    ax1.set_title("Model Specs vs Test AUC and Training Time", fontsize=14)

    # Set labels for the left y-axis (Test AUC)
    ax1.set_xlabel(f"{sheet_name} - Model Specs", fontsize=12)
    ax1.set_ylabel(test_auc_column, fontsize=12)
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.set_xticklabels(sorted_by_auc['Model Specs'], rotation=90)

    # Make vertical grid lines dotted
    ax1.grid(axis='x', linestyle=':', color='gray')  # Dotted vertical grid lines

    # Create a second y-axis for Training Time
    ax2 = ax1.twinx()

    # Plot Model Specs vs Total Training Time (in Green) on the right y-axis
    sns.lineplot(data=sorted_by_training_time, x='Model Specs', y=training_time_column,
                 marker='o', color='green', linestyle='-', markersize=8,
                 markerfacecolor='none', markeredgewidth=2, ax=ax2)
    ax2.set_ylabel("Training Time (seconds)", fontsize=12)
    ax2.tick_params(axis='y', labelcolor='green')

    # Remove horizontal grid lines from the second axis
    ax2.grid(axis='y', visible=False)  # Remove horizontal grid lines from right y-axis

    # Adjust layout and spacing
    plt.tight_layout(pad=4.0)  # Adjust the padding to avoid chopping off labels
    plt.subplots_adjust(bottom=0.2)  # Increase space at the bottom to avoid overlap with x labels

    # Show the plot
    plt.show()
    print()  # Empty line
