In [11]:
# Example code for creating a figure of suitable size
# for inclusion in the term paper.

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pandasql as ps
import matplotlib as mpl


plt.rcParams['axes.titlesize'] = 15
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 9
plt.rcParams['text.usetex'] = False
# Color cycle for color blind Source: https://gist.github.com/thriveth/8560036
CB_color_cycle = ['#377eb8', '#ff7f00', '#4daf4a',
                  '#f781bf', '#a65628', '#984ea3',
                  '#999999', '#e41a1c', '#dede00']   # https://gist.github.com/thriveth/8560036

# Set the default color cycle
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=CB_color_cycle) 

In [12]:
# Function for a consistent length of figures of 84 mm, converting to inches
def new_figure(height=55):
    "Return figure with width 84mm and given height in mm."

    return plt.figure(figsize=(84/10.16, height/10.16))

# x = range(10)
# y = [v**2 - 1 for v in x]

# fig = new_figure()
# ax = fig.add_subplot(1, 1, 1)
# ax.plot(x, y, 'o-', label='$y=x^2-1$')
# ax.legend()
# ax.set_xlabel('$x$')
# ax.set_ylabel('$y$')
# fig.savefig('sample_plot.pdf', bbox_inches='tight')
# plt.show()

### Reading all the different CSVs produced by each run of the time_it function 

In [13]:
# merge_sort_already_sorted= pd.read_csv("merge_sort_already_sorted.csv")
# merge_sort_random= pd.read_csv("merge_sort_random.csv")
# merge_sort_reverse_sorted= pd.read_csv("merge_sort_reverse_sorted.csv")

# quick_sort_random= pd.read_csv("quick_sort_random.csv")
# quick_sort_already_sorted_till_2560= pd.read_csv("quick_sort_sorted.csv")
# quick_sort_reverse_sorted_till_2560= pd.read_csv("quick_sort_reverse_sorted.csv")

# heap_sort_already_sorted= pd.read_csv("heap_sort_already_sorted.csv")
# heap_sort_random= pd.read_csv("heap_sort_random.csv")
# heap_sort_reverse_sorted= pd.read_csv("heap_sort_reverse_sorted.csv")

# numpy_sort_already_sorted= pd.read_csv("numpy_sort_already_sorted.csv")
# numpy_sort_random= pd.read_csv("numpy_sort_random.csv")
# numpy_sort_reverse_sorted= pd.read_csv("numpy_sort_reverse_sorted.csv")

# python_sorted_already_sorted= pd.read_csv("sorted_already_sorted.csv")
# python_sorted_random= pd.read_csv("sorted_random.csv")
# python_sorted_reverse_sorted= pd.read_csv("sorted_reverse_sorted.csv")         


In [14]:
# dataset=pd.concat([merge_sort_already_sorted, merge_sort_random, merge_sort_reverse_sorted, 
#                    quick_sort_random,quick_sort_already_sorted_till_2560,quick_sort_reverse_sorted_till_2560,
#                    heap_sort_already_sorted, heap_sort_random, heap_sort_reverse_sorted,
#                    numpy_sort_already_sorted, numpy_sort_random, numpy_sort_reverse_sorted,
#                    python_sorted_already_sorted, python_sorted_random, python_sorted_reverse_sorted])


# dataset.loc[ dataset['Data_Type_or_List_type']=='already_sorted', ['Data_Type_or_List_type'] ] ='sorted'
# dataset.loc[ dataset['Data_Type_or_List_type']=='reverese_sorted', ['Data_Type_or_List_type'] ] ='reverse_sorted'
# dataset.loc[ dataset['Data_Type_or_List_type']=='reverse', ['Data_Type_or_List_type'] ] ='reverse_sorted'
# dataset.reset_index(drop=True)

# dataset.to_csv("dataset_concat.csv", index=False)

dataset = pd.read_csv("dataset_concat.csv")
print (np.unique(dataset['Data_Type_or_List_type']).tolist())
print(dataset.shape)

['random', 'reverse_sorted', 'sorted']
(2037, 6)


In [15]:
dataset.head()

Unnamed: 0,Sort_Type,Data_Type_or_List_type,List_length,Runtimes,Number_of_repeatitions,Datetime
0,merge_sort,sorted,10,0.267837,7,2019-11-01 21:51:45.110982
1,merge_sort,sorted,10,0.277732,7,2019-11-01 21:51:45.110982
2,merge_sort,sorted,10,0.274984,7,2019-11-01 21:51:45.110982
3,merge_sort,sorted,10,0.250366,7,2019-11-01 21:51:45.110982
4,merge_sort,sorted,10,0.233188,7,2019-11-01 21:51:45.110982


In [16]:
# Extract the keys and values and plot them

min_query = """SELECT Sort_Type, 
Data_Type_or_List_type,
List_length, 
min( Runtimes/Number_of_repeatitions)  as Single_runtime
FROM dataset 
GROUP BY Sort_Type, 
Data_Type_or_List_type,
List_length """

df_min = pd.DataFrame( ps.sqldf(min_query) )
df_min.head()
df_min.shape

df_min.head()
print (np.unique(df_min['Data_Type_or_List_type']).tolist())

['random', 'reverse_sorted', 'sorted']


In [9]:
# Prove that the graph in NlogN
# Changing the constants by Hit and Trial
# The time in plotted in microseconds
def plot_minimum_times(input_type='sorted', lower_limit=81920, upper_limit=10485760, c1=0.001, c2=0.5):
    filter01 =  (df_min['Data_Type_or_List_type']==input_type)
    plot_data = df_min[filter01]

    filter02 = (plot_data['List_length']<=upper_limit) &  (plot_data['List_length']>=lower_limit)
    plot_data = plot_data[filter02]

    fig = new_figure()
    list_of_sorts= sorted( np.unique(plot_data['Sort_Type']).tolist() , reverse=True)
    list_of_sorts = [  'numpy_sort',] #, 'python_sorted']
    for sort_type in list_of_sorts:
        filter03 = (plot_data['Sort_Type']==sort_type)
        plot_this = plot_data[filter03]         
        plt.plot( np.log2( plot_this['List_length']) , plot_this['Single_runtime']*1000000,'-o' ,alpha=0.7, label=sort_type)


    n_log_n_small= c1*plot_this['List_length'] * np.log2( plot_this['List_length'] )
    n_log_n_large= c2*plot_this['List_length'] * np.log2( plot_this['List_length'] )
    plt.plot ( np.log2( plot_this['List_length']) , n_log_n_small,'-x', label="c1= "+str(c1) )
#     plt.plot ( np.log2( plot_this['List_length'] ), n_log_n_large,'-x', label='c2= '+str(c2))
    plt.xlabel('Log 2 of the size of the numeric array')
    plt.ylabel('Time in microseconds')
    plt.title("Runtimes of sort Algorithms for -"+input_type+" data")
    plt.legend()
    plt.tight_layout()
#     plt.savefig("log_plots\For -"+input_type+" from-"+str(lower_limit)+" to-"+str(upper_limit)+".pdf" , bbox_inches='tight')
    plt.show()

## Plots for all sort_types on a single plot, for a fixed permutation of data

In [10]:
from ipywidgets import interact #,interactive , fixed, interact_manual
# import ipywidgets as widgets
pickle_object= interact(plot_minimum_times, input_type=['random', 'reverse_sorted', 'sorted']
         ,lower_limit=  np.unique(df_min['List_length']).tolist() 
         ,upper_limit=  np.unique(df_min['List_length']).tolist()
        , c1=(0.001, 0.009, 0.001)
        , c2=(0.01, 0.09, 0.01)
        )

interactive(children=(Dropdown(description='input_type', index=2, options=('random', 'reverse_sorted', 'sorted…

## Plots for all permutation of data; for a fixed sort type