In [3]:
%load_ext autoreload
%autoreload 2

import os 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [6]:
exp_names = ['000, 001', '000, 110', '000, 111', '000, 010, 100', '000, 011, 100', '000, 011, 110', 'General Unitary']

def read_in_data(data_dir):
    data_files = os.listdir(data_dir)
    run_data = []
    for data_file in data_files:
        with open(os.path.join(data_dir, data_file), encoding="utf-8") as file:
            metadata = data_file.split("_")

            experiment_number = int(metadata[1])
            experiment_type = ( metadata[2] == "ibm" ) # True == IBM, False == Sim

            curr_data = np.zeros(1 if experiment_type else 512)
            idx = 0
            for line in file.readlines():
                if "% Error:" in line:
                    curr_data[idx] = float(line.split(" ")[2])
                    idx += 1
            
            run_data.append({
                "experiment_number": experiment_number // 2,
                "hadamard": bool(experiment_number % 2),
                "machine": experiment_type,
                "data": curr_data
            })
    return run_data

def plot_experiments(run_data, output_dir):
    # Group data by experiment number

    plt.rcParams['lines.linewidth'] = 20  # Sets the line width
    
    grouped_data = {}
    for run in run_data:
        exp_num = run['experiment_number']
        machine = run['machine']
        key = (exp_num, run['hadamard'], machine)
        if key not in grouped_data:
            grouped_data[key] = []
        grouped_data[key].extend(run['data'])
        
    # Create box and whisker plots
    for exp_num in set(key[0] for key in grouped_data):
        hadamard_data = grouped_data.get((exp_num, True, False), [])
        no_hadamard_data = grouped_data.get((exp_num, False, False), [])
        ibm_hadamard_data = grouped_data.get((exp_num, True, True), [])
        ibm_no_hadamard_data = grouped_data.get((exp_num, False, True), [])

        data_to_plot = []
        labels = []

        if hadamard_data:
            data_to_plot.append(hadamard_data)
            labels.append('Randomized')

        if no_hadamard_data:
            data_to_plot.append(no_hadamard_data)
            labels.append('Unrandomized')

        if not data_to_plot:  # Skip if no data is available
            continue

        plt.figure(figsize=(4, 4))
        plt.boxplot(data_to_plot, labels=labels, widths=0.4)
        plt.ylabel('Error Percentage')
        plt.ylim(0, 7) if exp_num != 6 else plt.ylim(0, 125)
        if (hadamard_data is not None) ^ (no_hadamard_data is not None):
            plt.title(f'{exp_names[exp_num]}')
        else:
            plt.title(f'{exp_names[exp_num]}')
        plt.savefig(os.path.join(output_dir, f'Barplot_Experiment_{exp_num}.png'), bbox_inches='tight')
        plt.close()
        
def histplots(run_data, output_dir):
    # Transforming the data into a DataFrame
    data_list = []
    for item in run_data:
        for d in item["data"]:
            data_list.append({
                "experiment_number": item["experiment_number"],
                "hadamard": item["hadamard"],
                "machine": item["machine"],
                "value": d
            })

    df = pd.DataFrame(data_list)

    # Creating and saving separate plots for each experiment_number using Seaborn
    for exp_num in df["experiment_number"].unique():
        plt.figure(figsize=(4, 4))
        sns.set_style("whitegrid")
        
        # Filtering data for the current experiment number
        exp_data = df[df["experiment_number"] == exp_num]
        
        # Plotting histogram for non-machine data
        sns.histplot(data=exp_data[exp_data["machine"] == False], x="value", hue="hadamard", kde=True, stat="density", common_norm=False, element="step", linewidth=0.5, line_kws={'linewidth': 0.5})
        
        # Adding vertical lines for machine=True data
        if exp_num != 6:
            plt.ylim(0, 1.5)
            plt.xlim(0, 8)
        else:
            plt.xlim(0, 150)
            plt.ylim(0, 0.1)
            # palette = sns.color_palette()
            # hadamard_colors = {False: palette[0], True: palette[1]} 
            # for _, row in exp_data[exp_data["machine"] == True].iterrows():
            #     plt.axvline(x=row['value'], color=hadamard_colors[row['hadamard']], linestyle='--', linewidth=1)
        
        plt.title(f'{exp_names[exp_num]}')
        plt.xlabel('Error Percentage')
        plt.ylabel('Density')
        
        # Saving the plot
        save_path = os.path.join(output_dir, f'Histogram_Experiment_{exp_num}.png')
        plt.savefig(save_path, bbox_inches='tight')
        plt.close()

In [7]:
run_data = read_in_data('data_files')
plot_experiments(run_data, "charts")
histplots(run_data, "charts")