In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from matplotlib.ticker import ScalarFormatter

In [None]:
## List of pandas dataframes
all_results = []
num_threads = [1, 2, 4, 8, 16, 32]

In [None]:
final_folder = 'final_results'
# get all folders in the final_results directory
all_result_folders = os.listdir(final_folder)

## Helper methods

In [None]:
def is_concurrent_perf(file_path:str)-> bool:
    """
    Check if the file is a concurrent performance result file.
    :param file: file name
    :return: True if the file is a concurrent performance result file, False otherwise
    """
    return file_path.startswith('concurrent') and file_path.endswith('_perf.txt')

def is_independent_perf(file_path:str)-> bool:
    """
    Check if the file is a concurrent performance result file.
    :param file: file name
    :return: True if the file is a concurrent performance result file, False otherwise
    """
    return file_path.startswith('independent') and file_path.endswith('_perf.txt')

def is_hyper(file_path: str)-> bool:
    """
    Check if the file is a hyper performance result file.
    :param file: file name
    :return: True if the file is a hyper performance result file, False otherwise
    """
    return 'hyper' in file_path

def is_numa_0(file_path: str)-> bool:
    """
    Check if the file is a numa 0 performance result file.
    :param file: file name
    :return: True if the file is a numa 0 performance result file, False otherwise
    """
    return 'numa_0' in file_path

def is_no_affinity(file_path: str)-> bool:
    """
    Check if the file is a no affinity performance result file.
    :param file: file name
    :return: True if the file is a no affinity performance result file, False otherwise
    """
    return 'no_affinity' in file_path

def is_multi_numa(file_path: str)-> bool:
    """
    Check if the file is a multi numa performance result file.
    :param file: file name
    :return: True if the file is a multi numa performance result file, False otherwise
    """
    return 'multi_numa' in file_path

def get_affinity_setting(file_path: str)-> str:
    """
    Get the affinity setting from the file name.
    :param file: file name
    :return: affinity setting
    """
    if is_hyper(file_path):
        return 'hyper_threading'
    elif is_numa_0(file_path):
        return 'numa_0'
    elif is_multi_numa(file_path):
        return 'multi_numa'
    elif is_no_affinity(file_path):
        return ''
    else:
        raise ValueError('Unknown affinity setting')

def get_affinity_setting_title(file_path: str)-> str:
    """
    Get the affinity setting from the file name.
    :param file: file name
    :return: affinity setting
    """
    if is_hyper(file_path):
        return 'hyper_threading'
    elif is_numa_0(file_path):
        return 'numa_0'
    elif is_multi_numa(file_path):
        return 'multi_numa'
    elif is_no_affinity(file_path):
        return 'no_affinity'
    else:
        raise ValueError('Unknown affinity setting')


In [None]:
def get_all_perf_result_folders():
    """
    Get all performance result folders.
    :return: list of performance result folders
    """
    result_folders: list[str] = [] 
    for folder in all_result_folders:
        current_path = os.path.join(final_folder, folder)
        if os.path.isdir(current_path):
            result_folders.append(folder)
    
    # wher the folder cotains the string "perf_"
    perf_result_folders = [f for f in result_folders if f.startswith('perf_')]
    return perf_result_folders

## Code 

In [None]:
def read_value_key(line: str)-> tuple[str, int, float]:
    """
    Read the value and key from the file name.
    :param read_file: file name
    :return: tuple of value and key
    """
    # Split the file name by '_'
    words = line.split()
    # if 'instructions' in words:

    
    count = words[0]
    key = words[1] 
    # Get the last word (the percentage) and remove the parentheses to parse it to a float
    if(len(words) > 2):
        percentage = float(words[-1].strip("()%"))
    else:
        percentage = None
    

    return key, int(count), percentage


In [None]:
def read_perf_file(current_folder: str, perf_txt_file: str)-> pd.DataFrame:
    """
    """
    columns = ['key', 'count', 'percentage']
    df = pd.DataFrame(columns=columns)
    current_file_path = final_folder + '/' + current_folder + '/' + perf_txt_file
    with open(current_file_path, 'r') as read_file:
        # all_lines = read_file.readlines()
        # for i, line in enumerate(all_lines):
        #     print(f"Line {i}: {line}")
        # get rid of the first 5 lines
        _ = read_file.readline()
        _ = read_file.readline()
        _ = read_file.readline()
        _ = read_file.readline()
        _ = read_file.readline()

        for i in range(11):
            line = read_file.readline()
            try:
                key, count, percentage = read_value_key(line)
                df.loc[len(df)] = [key, count, percentage]
            except ValueError as e:
                print(f"Error reading line {i}: {e}")
        _ = read_file.readline()
        line12 = read_file.readline()
        return df

In [None]:
all_perf_results_folders = get_all_perf_result_folders()
all_perf_results_folders

In [None]:
# Only keep the folders that start with 'perf_' 
all_perf_results_folders = get_all_perf_result_folders()
for perf_result_folder in all_perf_results_folders:

    # Go into the folder and get all txt files
    folder_path = os.path.join(final_folder, perf_result_folder)
    all_txt_files = os.listdir(folder_path)        
    # Only keep the txt files
    concurrent_txt_files = [file for file in all_txt_files if is_concurrent_perf(file)]
    indpendent_txt_files = [file for file in all_txt_files if is_independent_perf(file)]

    # for concurrent_txt_file in concurrent_txt_files:
    #     # Read the file and store it in a dataframe
    #     df = read_perf_file(perf_result_folder, concurrent_txt_file)
    #     current_file = os.path.join(final_folder, perf_result_folder) + '/' + concurrent_txt_file.replace(".txt", ".csv")
    #     df.to_csv(current_file)
    for indpendent_txt_file in indpendent_txt_files:
        # Read the file and store it in a dataframe
        df = read_perf_file(perf_result_folder, indpendent_txt_file)
        # print("ind_txt:" , indpendent_txt_file)
        
        current_csv_file_name = indpendent_txt_file.replace(".txt", ".csv")
        full_csv_save_path = os.path.join(folder_path, current_csv_file_name)
        print("full_csv_path:", full_csv_save_path, sep="\n")
        df.to_csv(full_csv_save_path)


In [None]:
def get_data_for_key(folder: str, par_method: str, key: str) -> list[int]:
    ys = []
    affinity = get_affinity_setting(folder)
    
    # To get results from the no affinity case
    if affinity != '':
        affinity = '_' + affinity 

    
    for threads in num_threads:

        current_file = f"final_results/{folder}/{par_method}{affinity}_{str(threads)}_perf.csv"
        df = pd.read_csv(current_file)
        df_cache_misses = df[df['key'] == key]
        ys.append(df_cache_misses["count"].values[0])
    return ys

In [None]:
all_perf_results_folders = get_all_perf_result_folders()
all_perf_results_folders.sort()
all_perf_results_folders

In [None]:

def create_bar_chart(key: str, par_method: str, yticks: list = None, save: bool = False) -> tuple[plt.Figure, plt.Axes]:
    fig, ax = plt.subplots(figsize=(10, 6), nrows=2, ncols=2)
    fig.tight_layout(pad=3.0)
    key_cap = key
    par_cap = par_method.capitalize()
    fig.suptitle(f'{par_cap} - {key_cap} for Different Affinity Settings')
    bar_colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    for i, perf_result_folder in enumerate(all_perf_results_folders[:-1]):

        row = i // 2
        col = i % 2
        # Read the file and store it in a dataframe
        data = get_data_for_key(perf_result_folder, par_method, key)


        ax[row, col].set_title(f"{get_affinity_setting_title(perf_result_folder)}")
        ax[row,col].bar(range(len(num_threads)), data, color=bar_colors)
        ax[row, col].set_xticks(range(len(num_threads)))
        ax[row,col].set_xticklabels(num_threads)
        
        if yticks is not None:
            ax[row, col].set_yticks(yticks)
        
        if(row == 1):
            ax[row, col].set_xlabel('# of Threads')
        if col == 0:
            ax[row, col].set_ylabel(key_cap)
    if save:
        plt.savefig(f'final_results/figs/{par_method}_{key}_bar_chart.png')

            
    return fig, ax
        

In [None]:
def get_row_col_beside(i: int) -> tuple[int, int]:
    """
    Get the row and column for the given index.
    :param i: index
    :return: row and column
    """
    row = i // 2
    col = i % 2
    return row, col
def get_row_col_below(i: int) -> tuple[int, int]:
    """
    Get the row and column for the given index.
    :param i: index
    :return: row and column
    """
    row = i // 2 + 2
    col = i % 2 + 2
    return row, col

In [None]:
def create_bar_chart_all(key: str,  yticks: list = None, save: bool = False) -> tuple[plt.Figure, plt.Axes]:
    fig, ax = plt.subplots(figsize=(12, 7), nrows=2, ncols=4)
    # fig.tight_layout(pad=3.0)
    key_cap = key
    fig.suptitle(f'{key_cap} for Different Affinity Settings', fontsize=16)
    bar_colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    for i, perf_result_folder in enumerate(all_perf_results_folders[:-1]):

        row = i // 4
        col = i % 4
        print("row:", row, "col:", col)
        # Read the file and store it in a dataframe
        independent_data = get_data_for_key(perf_result_folder, 'independent', key)
        concurrent_data = get_data_for_key(perf_result_folder, 'concurrent', key)

        # Independent data
        ax[row, col].set_title(f"Independent \n {get_affinity_setting_title(perf_result_folder)}")
        
        ax[row, col].bar(range(len(num_threads)), independent_data, color=bar_colors)
        ax[row, col].set_xticks(range(len(num_threads)))
        ax[row, col].set_xticklabels(num_threads)

        # Concurrent data
        # ax[row + 1, col].set_title(f"Concurrent \n {get_affinity_setting_title(perf_result_folder)}")
        ax[row + 1, col].set_title(f"Concurrent")
        ax[row + 1, col].bar(range(len(num_threads)), concurrent_data, color=bar_colors)
        ax[row + 1, col].set_xticks(range(len(num_threads)))
        ax[row + 1, col].set_xticklabels(num_threads)
        
        
        formatter = ScalarFormatter(useMathText=True)
        formatter.set_scientific(True)
        formatter.set_powerlimits((-1, 1))
        ax[row, col].yaxis.set_major_formatter(formatter)
        ax[row + 1, col].yaxis.set_major_formatter(formatter)
        if yticks is not None:
            ax[row, col].set_yticks(yticks)
            ax[row + 1, col].set_yticks(yticks)
             # Apply 10^ notation to the y-axis
        
        ax[row + 1, col].set_xlabel('# of Threads')
        if col == 0:
            ax[row, col].set_ylabel(key_cap)
            ax[row+1, col].set_ylabel(key_cap)
        if save:
            plt.savefig(f'final_results/figs/{key}_bar_chart_all.png')

                
    return fig, ax
        

In [None]:
yticks = [x*10e7 for x in range(0, 36, 5)]
fig, ax = create_bar_chart_all('cache-misses', yticks, save=True)

In [None]:
yticks = [i * 10e6 for i in range(0, 30, 5)]
fig, ax = create_bar_chart_all('dTLB-load-misses', yticks, save=True)

In [None]:
yticks = [i * 10e5 for i in range(0, 15, 2)]
fig, ax = create_bar_chart_all('context-switches', yticks, save=True)

## Figures

### Instructions and Cycles

In [None]:
key: str = 'cycles'
yticks = [a*10e10 for a in range(0, 18, 2)]
fig, ax = create_bar_chart(key, 'concurrent',  yticks, save=True)

In [None]:
yticks = [a*10e9 for a in range(0, 50, 5)]
fig, ax = create_bar_chart('instructions', 'concurrent', yticks, save=True)

### Cache Misses

In [None]:
yticks = [i * 10e7 for i in range(0, 40, 5)]
fig, ax = create_bar_chart('cache-misses', 'concurrent', yticks, save=True)

In [None]:
yticks = [i * 10e7 for i in range(0, 40, 5)]
fig, ax = create_bar_chart('cache-misses', 'independent', yticks, save=True)

### iTLB-load-misses

In [None]:
key = 'iTLB-load-misses'
yticks = [i * 10e6 for i in range(0, 12,2)]
fig, ax = create_bar_chart(key, 'concurrent', yticks, save=True)


In [None]:
key = 'iTLB-load-misses'
yticks = [i * 10e6 for i in range(0, 12,2)]
fig, ax = create_bar_chart(key, 'independent', yticks, save=True)

### dTLB-load-misses

In [None]:
key = 'dTLB-load-misses'
yticks = [i * 10e6 for i in range(0, 30, 5)]
fig, ax = create_bar_chart(key, 'concurrent', yticks, save=True)

In [None]:
key = 'dTLB-load-misses'
yticks = [i * 10e6 for i in range(0, 30, 5)]
fig, ax = create_bar_chart(key, 'independent', yticks, save=True)

### Context Switches

In [None]:
key = 'context-switches'
yticks = [i * 10e5 for i in range(0, 15, 2)]
fig, ax = create_bar_chart(key, 'concurrent', yticks, save=True)

In [None]:
key = 'context-switches'
yticks = [i * 10e5 for i in range(0, 15, 2)]
fig, ax = create_bar_chart(key, 'independent', yticks, save=True)

## Independent Figures

In [None]:
par_method = 'independent'

In [None]:
key: str = 'cycles'
yticks = [a*10e9 for a in range(0, 50, 5)]
fig, ax = create_bar_chart(key, par_method,  yticks, save=True)

In [None]:
key: str = 'instructions'
yticks = [a*10e9 for a in range(0, 31, 5)]
fig, ax = create_bar_chart(key, par_method,  yticks, save=True)

In [None]:
key: str = 'cache-misses'
yticks = [a*10e6 for a in range(0, 81, 10)]
fig, ax = create_bar_chart(key, par_method,  yticks, save=True)

In [None]:
key: str = 'context-switches'
yticks = [a*10e4 for a in range(0, 16, 2)]
fig, ax = create_bar_chart(key, par_method,  yticks, save=True)

In [None]:
key: str = 'iTLB-load-misses'
yticks = [a*10e5 for a in range(0, 16, 2)]
fig, ax = create_bar_chart(key, par_method,  yticks, save=True)

In [None]:
key: str = 'dTLB-load-misses'
yticks = [a*10e6 for a in range(0, 16, 2)]
fig, ax = create_bar_chart(key, par_method,  yticks, save=True)