The selections for Optimal ```S``` are: ```10```, ```11```, ```12```, ```13```, ```14```

In [1]:
import os
from tqdm import tqdm
import pandas as pd
import random

from sort_functions.sort_functions import PythonSortFunctions, CSortFunctions

PSF = PythonSortFunctions()
CSF = CSortFunctions()

In [2]:
def generate_numbers(order_type: str) -> list:

    max_power = 6
    max_length = 10**max_power + 1

    gen_list = list(range(1, max_length))

    if order_type == "ascending":
        return gen_list
    
    elif order_type == "random":
        
        random_order = gen_list.copy()
        random.shuffle(random_order)
        return random_order
    
    elif order_type == "descending":
        return gen_list[::-1]

In [3]:
best_case = generate_numbers("ascending")
worst_case = generate_numbers("descending")
average_case = generate_numbers("random")

scenarios = [best_case, worst_case, average_case]

In [None]:
# Python Analysis
def hybrid_sort_python_analysis(array: list, S: int) -> list:

    return PSF.hybrid_sort(array=array, threshold=S, do_not_return_sorted_data=True)

def merge_sort_python_analysis(array: list) -> list:

    return PSF.sort(array=array, option="merge", do_not_return_sorted_data=True)

# C Analysis
def hybrid_sort_c_analysis(array: list, S: int) -> list:

    return CSF.hybrid_sort(array=array, threshold=S, do_not_return_sorted_data=True)

def merge_sort_c_analysis(array: list) -> list:

    return CSF.sort(array=array, option="merge", do_not_return_sorted_data=True)

In [4]:
# Merge Sort
def merge_sort_analysis(scenarios: list) -> pd.DataFrame:

    mp_key_comp_python_list = []
    mp_sort_time_python_list = []

    mc_key_comp_c_list = []
    mc_sort_time_c_list = []

    for arr in tqdm(scenarios):

        mp_key_comp, mp_sort_time = merge_sort_python_analysis(arr)
        mc_key_comp, mc_sort_time = merge_sort_c_analysis(arr)

        mp_key_comp_python_list.append(mp_key_comp)
        mp_sort_time_python_list.append(mp_sort_time)

        mc_key_comp_c_list.append(mc_key_comp)
        mc_sort_time_c_list.append(mc_sort_time)

    python_avg_key_comparisons = sum(mp_key_comp_python_list) / len(scenarios)
    python_avg_sort_time = sum(mp_sort_time_python_list) / len(scenarios)

    c_avg_key_comparisons = sum(mc_key_comp_c_list) / len(scenarios)
    c_avg_sort_time = sum(mc_sort_time_c_list) / len(scenarios)

    return pd.DataFrame([{
        "python_avg_key_comparisons": python_avg_key_comparisons,
        "python_avg_sort_time": python_avg_sort_time,
        "c_avg_key_comparisons": c_avg_key_comparisons,
        "c_avg_sort_time": c_avg_sort_time
    }])


# Hybrid Sort
def hybrid_sort_analysis(scenarios: list) -> pd.DataFrame:

    s_options = [10, 11, 12, 13, 14]
    data = []

    for S in tqdm(s_options):

        print(f"Running for S = {S}")

        hp_key_comp_python_list = []
        hp_sort_time_python_list = []

        hc_key_comp_c_list = []
        hc_sort_time_c_list = []
        
        for arr in scenarios:
            hp_key_comp, hp_sort_time = hybrid_sort_python_analysis(arr, S)
            hc_key_comp, hc_sort_time = hybrid_sort_c_analysis(arr, S)

            hp_key_comp_python_list.append(hp_key_comp)
            hp_sort_time_python_list.append(hp_sort_time)

            hc_key_comp_c_list.append(hc_key_comp)
            hc_sort_time_c_list.append(hc_sort_time)

        python_avg_key_comparisons = sum(hp_key_comp_python_list) / len(scenarios)
        python_avg_sort_time = sum(hp_sort_time_python_list) / len(scenarios)

        c_avg_key_comparisons = sum(hc_key_comp_c_list) / len(scenarios)
        c_avg_sort_time = sum(hc_sort_time_c_list) / len(scenarios)

        data.append({
            "S": S,
            "python_avg_key_comparisons": python_avg_key_comparisons,
            "python_avg_sort_time": python_avg_sort_time,
            "c_avg_key_comparisons": c_avg_key_comparisons,
            "c_avg_sort_time": c_avg_sort_time
        })

    data = pd.DataFrame(data)

    return data

In [6]:
msa = merge_sort_analysis(scenarios)
hsa = hybrid_sort_analysis(scenarios)

base_path = os.path.join("data_storage", "final_comparison")

if not os.path.exists(base_path):
    os.makedirs(base_path)

msa.to_csv(os.path.join(base_path, "merge_sort_analysis.csv"), index=False)
hsa.to_csv(os.path.join(base_path, "hybrid_sort_analysis.csv"), index=False)

100%|██████████| 3/3 [00:18<00:00,  6.08s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

Running for S = 10


 20%|██        | 1/5 [00:15<01:02, 15.62s/it]

Running for S = 11


 40%|████      | 2/5 [00:30<00:45, 15.19s/it]

Running for S = 12


 60%|██████    | 3/5 [00:45<00:29, 14.93s/it]

Running for S = 13


 80%|████████  | 4/5 [00:59<00:14, 14.77s/it]

Running for S = 14


100%|██████████| 5/5 [01:14<00:00, 14.85s/it]


In [8]:
p_Sval = int(hsa.loc[hsa["python_avg_sort_time"].idxmin()]["S"])
c_Sval = int(hsa.loc[hsa["c_avg_sort_time"].idxmin()]["S"])

print(f"Best S value for Python: {p_Sval}")
print(f"Best S value for C: {c_Sval}")

Best S value for Python: 13
Best S value for C: 11


In [9]:
S = (p_Sval + c_Sval) // 2
print(f"Final S value: {S}")

Final S value: 12


The final ```S``` value concludes to ```12```