In [3]:
%load_ext autoreload
%autoreload 2

import os 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [22]:
exp_names = [r'Nonzero $x_{000}, x_{001}$', r'Nonzero $x_{000}, x_{110}$', r'Nonzero $x_{000}, x_{111}$', r'Nonzero $x_{000}, x_{010}, x_{100}$', r'Nonzero $x_{000}, x_{011}, x_{100}$', r'Nonzero $x_{000}, x_{011}, x_{110}$', 'General Unitary', r'Nonzero $x_{0000}, x_{1111}$', r'Nonzero $x_{0000}, x_{1111}$', r'Nonzero $x_{011}, x_{100}$', r'Nonzero $x_{00000}, x_{11111}$', r'Nonzero $x_{110}, x_{001}$']

def read_in_data(data_dir):
    data_files = os.listdir(data_dir)
    run_data = []
    for data_file in data_files:
        with open(os.path.join(data_dir, data_file), encoding="utf-8") as file:
            metadata = data_file.split("_")

            experiment_number = int(metadata[1])
            experiment_type = ( metadata[2] == "ibm" ) # True == IBM, False == Sim
            
            number_nan = 0

            curr_data = []
            for line in file.readlines():
                if "Fidelity:" in line:
                    # if line.split(" ")[2][-1] == ",":
                    #     curr_data.append(float(line.split(" ")[2][:-1]))
                    # elif line.split(" ")[2] == "nan":
                    #     number_nan += 1
                    if data_file == "experiment_12_simulator.txt":
                        curr_data.append(float(line.split(" ")[1]) / 4)
                    else:
                        curr_data.append(float(line.split(" ")[1]))
            # print(f"There are {number_nan} NaN in this data file.")
            
            run_data.append({
                "experiment_number": experiment_number // 2,
                "hadamard": bool(experiment_number % 2),
                "machine": experiment_type,
                "data": np.array(curr_data)
            })
    return run_data

def plot_experiments(run_data, output_dir):
    # Group data by experiment number

    tex_fonts = {
        # Use LaTeX to write all text
        "text.usetex": False,
        "font.family": "serif",
        "axes.labelsize": 11,
        "font.size": 11,
        "legend.fontsize": 8,
        "xtick.labelsize": 8,
        "ytick.labelsize": 8
    }

    plt.rcParams.update(tex_fonts)
    plt.rcParams['lines.linewidth'] = 20  # Sets the line width
    
    grouped_data = {}
    for run in run_data:
        exp_num = run['experiment_number']
        machine = run['machine']
        key = (exp_num, run['hadamard'], machine)
        if key not in grouped_data:
            grouped_data[key] = []
        grouped_data[key].extend(run['data'])
        
    # Create box and whisker plots
    for exp_num in set(key[0] for key in grouped_data):
        hadamard_data = grouped_data.get((exp_num, True, False), [])
        no_hadamard_data = grouped_data.get((exp_num, False, False), [])
        ibm_hadamard_data = grouped_data.get((exp_num, True, True), [])
        ibm_no_hadamard_data = grouped_data.get((exp_num, False, True), [])

        data_to_plot = []
        labels = []

        if hadamard_data:
            data_to_plot.append(hadamard_data)
            labels.append('Randomized')

        if no_hadamard_data:
            data_to_plot.append(no_hadamard_data)
            labels.append('Unrandomized')

        if not data_to_plot:  # Skip if no data is available
            continue

        plt.figure(figsize=(4, 4))
        plt.boxplot(data_to_plot, labels=labels, widths=0.4, showfliers=False)
        plt.ylabel('Fidelity')
        plt.yscale("log")
        # plt.ylim(0.990, 1) if exp_num != 6 else plt.ylim(0.875, 1)
        if (hadamard_data is not None) ^ (no_hadamard_data is not None):
            plt.title(f'{exp_names[exp_num]}')
        else:
            plt.title(f'{exp_names[exp_num]}')
        save_path = os.path.join(output_dir, f'Barplot_Experiment_{exp_num}.svg')
        plt.savefig(save_path, format='svg', bbox_inches='tight')
        plt.close()
        
def histplots(run_data, output_dir):
    # Transforming the data into a DataFrame
    data_list = []
    for item in run_data:
        for d in item["data"]:
            data_list.append({
                "experiment_number": item["experiment_number"],
                "hadamard": item["hadamard"],
                "machine": item["machine"],
                "value": d
            })

    df = pd.DataFrame(data_list)

    # Creating and saving separate plots for each experiment_number using Seaborn
    for exp_num in df["experiment_number"].unique():
        plt.figure(figsize=(4, 4))
        
        tex_fonts = {
            # Use LaTeX to write all text
            "text.usetex": False,
            "font.family": "serif",
            "axes.labelsize": 11,
            "font.size": 11,
            "legend.fontsize": 8,
            "xtick.labelsize": 8,
            "ytick.labelsize": 8
        }

        plt.rcParams.update(tex_fonts)
        
        # Filtering data for the current experiment number
        exp_data = df[df["experiment_number"] == exp_num]
        
        # Plotting histogram for non-machine data
        ax = sns.histplot(data=exp_data[exp_data["machine"] == False], x="value", hue="hadamard", kde=True, stat="density", common_norm=False, element="step", linewidth=0.5, line_kws={'linewidth': 0.5})
        
        # Adding vertical lines for mean values
        palette = sns.color_palette()
        hadamard_colors = {False: palette[0], True: palette[1]} 
        if exp_num != 6:
            plt.ylim(0, 1.5)
            # plt.xlim(0, 6)
            mean_h = df[(df["experiment_number"] == exp_num) & (df["hadamard"] == True)]["value"].mean()
            mean_nh = df[(df["experiment_number"] == exp_num) & (df["hadamard"] == False)]["value"].mean()
            plt.axvline(x=mean_h, color=hadamard_colors[True], linestyle='--', linewidth=1)
            plt.axvline(x=mean_nh, color=hadamard_colors[False], linestyle='--', linewidth=1)
            
        else:
            plt.xlim(0, 40)
            plt.ylim(0, 0.1)
            mean = df[df["experiment_number"] == 6]["value"].mean()
            plt.axvline(x=mean, color=hadamard_colors[False], linestyle='--', linewidth=1)
            # palette = sns.color_palette()
            # hadamard_colors = {False: palette[0], True: palette[1]} 
            # for _, row in exp_data[exp_data["machine"] == True].iterrows():
            #     plt.axvline(x=row['value'], color=hadamard_colors[row['hadamard']], linestyle='--', linewidth=1)
        
        plt.title(exp_names[exp_num])
        plt.xlabel('Error Percentage')
        plt.ylabel('Density')
        legend = ax.get_legend()
        legend.set_title("Randomized")
        
        # Saving the plot
        save_path = os.path.join(output_dir, f'Histogram_Experiment_{exp_num}.svg')
        plt.savefig(save_path, format='svg', bbox_inches='tight')
        plt.close()

In [23]:
run_data = read_in_data('data_files')
plot_experiments(run_data, "charts")
histplots(run_data, "charts")

In [None]:
data_list = []
for item in run_data:
    for d in item["data"]:
        data_list.append({
            "experiment_number": item["experiment_number"],
            "hadamard": item["hadamard"],
            "machine": item["machine"],
            "value": d
        })

df = pd.DataFrame(data_list)

In [None]:
for a in range(6):
    print(df[(df["experiment_number"] == a) & (df["hadamard"] == True)]["value"].mean())
    print(df[(df["experiment_number"] == a) & (df["hadamard"] == False)]["value"].mean())

print(df[df["experiment_number"] == 6]["value"].mean())

0.5414458570958331
2.854306208429214
0.016041394615921037
3.2981800179377583
0.0318327149142543
2.0084791649812734
2.1207517808686407
2.701250907588058
1.932170403872651
2.53440956720008
3.3658415366323227
3.6777118400598137
21.011618133002514
