In [1]:
# Example code for creating a figure of suitable size
# for inclusion in the term paper.

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pandasql as ps
import matplotlib as mpl


plt.rcParams['axes.titlesize'] = 15
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['text.usetex'] = False
# Color cycle for color blind Source: https://gist.github.com/thriveth/8560036
CB_color_cycle = ['#377eb8', '#ff7f00', '#4daf4a',
                  '#f781bf', '#a65628', '#984ea3',
                  '#999999', '#e41a1c', '#dede00']  

# Set the default color cycle
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=CB_color_cycle) 

In [2]:
# Function for a consistent length of figures of 84 mm, converting to inches
def new_figure(height=55):
    "Return figure with width 84mm and given height in mm."
    return plt.figure(figsize=(84/10.16, height/10.16))

### Reading all the different CSVs produced by each run of the time_it function 

In [50]:
# path ="C:\\Users\\admin\\Desktop\\INF221\\termpaper01\\plots_and_data\\csvs\\_20191120\\"
# merge_sort_already_sorted= pd.read_csv(path+"merge_sort_sorted.csv")
# merge_sort_random= pd.read_csv(path+"merge_sort_random.csv")
# merge_sort_reverse_sorted= pd.read_csv(path+"merge_sort_reverse_sorted.csv")

# quick_sort_random= pd.read_csv(path+"quick_sort_random.csv")
# quick_sort_already_sorted= pd.read_csv(path+"quick_sort_sorted.csv")
# quick_sort_reverse_sorted= pd.read_csv(path+"quick_sort_reverse_sorted.csv")

# heap_sort_already_sorted= pd.read_csv(path+"heap_sort_sorted.csv")
# heap_sort_random= pd.read_csv(path+"heap_sort_random.csv")
# heap_sort_reverse_sorted= pd.read_csv(path+"heap_sort_reverse_sorted.csv")

# numpy_sort_already_sorted= pd.read_csv(path+"sort_sorted.csv")
# numpy_sort_random= pd.read_csv(path+"sort_random.csv")
# numpy_sort_reverse_sorted= pd.read_csv(path+"sort_reverse_sorted.csv")

# python_sorted_already_sorted= pd.read_csv(path+"sorted_sorted.csv")
# python_sorted_random= pd.read_csv(path+"sorted_random.csv")
# python_sorted_reverse_sorted= pd.read_csv(path+"sorted_reverse_sorted.csv")         


In [3]:
# dataset=pd.concat([merge_sort_already_sorted, merge_sort_random, merge_sort_reverse_sorted, 
#                    quick_sort_random,quick_sort_already_sorted,quick_sort_reverse_sorted,
#                    heap_sort_already_sorted, heap_sort_random, heap_sort_reverse_sorted,
#                    numpy_sort_already_sorted, numpy_sort_random, numpy_sort_reverse_sorted,
#                    python_sorted_already_sorted, python_sorted_random, python_sorted_reverse_sorted])


# dataset.loc[ dataset['Data_Type_or_List_type']=='already_sorted', ['Data_Type_or_List_type'] ] ='sorted'
# dataset.loc[ dataset['Data_Type_or_List_type']=='reverese_sorted', ['Data_Type_or_List_type'] ] ='reverse_sorted'
# dataset.loc[ dataset['Data_Type_or_List_type']=='reverse', ['Data_Type_or_List_type'] ] ='reverse_sorted'

# dataset.loc[ dataset['Sort_Type']=='sorted', ['Sort_Type'] ] ='python_sorted'
# dataset.loc[ dataset['Sort_Type']=='sort', ['Sort_Type'] ] ='numpy_sort'
# dataset.reset_index(drop=True)

# dataset.to_csv("dataset_concat.csv", index=False)

dataset = pd.read_csv("dataset_concat.csv")
print (np.unique(dataset['Data_Type_or_List_type']).tolist())
print(dataset.shape)

['random', 'reverse_sorted', 'sorted']
(2205, 6)


In [4]:
dataset.tail()

Unnamed: 0,Sort_Type,Data_Type_or_List_type,List_length,Runtimes,Number_of_repeatitions,Datetime
2200,python_sorted,reverse_sorted,10485760,2.323417,10,2019-11-22 15:36:49.260087
2201,python_sorted,reverse_sorted,10485760,2.369249,10,2019-11-22 15:36:49.260087
2202,python_sorted,reverse_sorted,10485760,2.381871,10,2019-11-22 15:36:49.260087
2203,python_sorted,reverse_sorted,10485760,2.352574,10,2019-11-22 15:36:49.260087
2204,python_sorted,reverse_sorted,10485760,2.301563,10,2019-11-22 15:36:49.260087


In [5]:
# Extract the keys and values and plot them

min_query = """SELECT Sort_Type, 
Data_Type_or_List_type,
List_length, 
min( Runtimes)  as Single_runtime
FROM dataset 
GROUP BY Sort_Type, 
Data_Type_or_List_type,
List_length """

df_min = pd.DataFrame( ps.sqldf(min_query) )

print (np.unique(df_min['Data_Type_or_List_type']).tolist())
print (np.unique(df_min['Sort_Type']).tolist())
df_min.head()

['random', 'reverse_sorted', 'sorted']
['heap_sort', 'merge_sort', 'numpy_sort', 'python_sorted', 'quick_sort']


Unnamed: 0,Sort_Type,Data_Type_or_List_type,List_length,Single_runtime
0,heap_sort,random,10,1.8e-05
1,heap_sort,random,20,4.9e-05
2,heap_sort,random,40,0.000128
3,heap_sort,random,80,0.000311
4,heap_sort,random,160,0.000733


In [54]:
# xticks_wanted = np.asarray( [10,20,40,80,160,320,640,1280,2560,5120
#               ,10240,20480,40960,81920,163840,327680,655360,1310720
#               ,2621440,5242880,10485760])
# y = xticks_wanted[ (xticks_wanted >= 5200) & (xticks_wanted <= 10485760)]
# y


In [61]:
# Prove that the graph in NlogN
# Changing the constants by Hit and Trial
# The time in plotted in nanoseconds

def plot_minimum_times(input_type='random', lower_limit=81920, upper_limit=10485760, c1=3, c2=1000):
    filter01 =  (df_min['Data_Type_or_List_type']==input_type)
    plot_data = df_min[filter01]

    filter02 = (plot_data['List_length']<=upper_limit) &  (plot_data['List_length']>=lower_limit)
    plot_data = plot_data[filter02]

    fig = new_figure()
    list_of_sorts= sorted( np.unique(plot_data['Sort_Type']).tolist() , reverse=True)
#     list_of_sorts= ['numpy_sort', 'python_sorted']
    for sort_type in list_of_sorts:
        filter03 = (plot_data['Sort_Type']==sort_type)
        plot_this = plot_data[filter03]         
        plt.loglog (plot_this['List_length'], plot_this['Single_runtime'],'-o' ,alpha=0.8, label=sort_type, basex=2, basey=2)


    n_log_n_small= (c1/ 1e9)*plot_this['List_length'] * np.log2( plot_this['List_length'] )
    n_log_n_large= (c2/ 1e9)*plot_this['List_length'] * np.log2( plot_this['List_length'] )
    plt.loglog (  plot_this['List_length'] , n_log_n_small ,'-x', label="c1= "+str(c1) , basex=2, basey=2)
    plt.loglog (  plot_this['List_length'] ,n_log_n_large ,'-x', label='c2= '+str(c2) , basex=2, basey=2)
    plt.xlabel('Length of list')
    plt.ylabel('Time in seconds')
#     plt.title("Runtimes of sort Algorithms for -"+input_type+" data")
    plt.legend()
    
    plt.tight_layout()
#     plt.savefig("plots\loglog\Numpy vs Python -sorted data 81920-10485760.pdf" , bbox_inches='tight')
    plt.savefig("plots\loglog\For -"+input_type+" from-"+str(lower_limit)+" to-"+str(upper_limit)+".pdf" , bbox_inches='tight')    
    plt.show()


## Plots for all sort_types on a single plot, for a fixed permutation of data

In [62]:
from ipywidgets import interact #,interactive , fixed, interact_manual
pickle_object= interact(plot_minimum_times, input_type=['random', 'reverse_sorted', 'sorted']
         ,lower_limit=  np.unique(df_min['List_length']).tolist() 
         ,upper_limit=  np.unique(df_min['List_length']).tolist()
        , c1=(1,5,0.1)
        , c2=(800,1501,50)
        )

interactive(children=(Dropdown(description='input_type', options=('random', 'reverse_sorted', 'sorted'), value…

## Plots for all permutation of data; for a fixed sort type

In [63]:
print (np.unique(df_min['Sort_Type']).tolist())

['heap_sort', 'merge_sort', 'numpy_sort', 'python_sorted', 'quick_sort']


In [64]:
# Prove that the graph in NlogN
# Changing the constants by Hit and Trial
# The time in plotted in microseconds
def plot_minimum_times_for_each_sort(sort_type='merge_sort', lower_limit=81920, upper_limit=10485760):
    filter01 =  (df_min['Sort_Type']==sort_type)
    plot_data = df_min[filter01]

    filter02 = (plot_data['List_length']<=upper_limit) &  (plot_data['List_length']>=lower_limit)
    plot_data = plot_data[filter02]

    fig = new_figure()
    permut_types= sorted( np.unique(plot_data['Data_Type_or_List_type']).tolist() , reverse=True)
    for permut in permut_types:
        filter03 = (plot_data['Data_Type_or_List_type']==permut)
        plot_this = plot_data[filter03]         
        plt.loglog (plot_this['List_length'], plot_this['Single_runtime'],'-o' , label=permut, basex=2, basey=2)

    plt.xlabel('Length of list')
    plt.ylabel('Time in seconds')
#     plt.title("Runtimes of data -"+sort_type+" algorithm ")
    plt.legend()
    plt.tight_layout()
    plt.savefig("plots\loglog\For -"+sort_type+" from-"+str(lower_limit)+" to-"+str(upper_limit)+".pdf" , bbox_inches='tight')
    plt.show()

In [65]:
pickle_object = interact(plot_minimum_times_for_each_sort, sort_type=['heap_sort', 'merge_sort', 'numpy_sort', 'python_sorted', 'quick_sort']
         ,lower_limit=  np.unique(df_min['List_length']).tolist() 
         ,upper_limit=  np.unique(df_min['List_length']).tolist()
        )

interactive(children=(Dropdown(description='sort_type', index=1, options=('heap_sort', 'merge_sort', 'numpy_so…

In [16]:
# Extract the keys and values and plot them

min_query = """SELECT Sort_Type, 
Data_Type_or_List_type,
List_length, 
min( Runtimes)  as Single_runtime
FROM dataset 
GROUP BY Sort_Type, 
Data_Type_or_List_type,
List_length
"""

df_min = pd.DataFrame( ps.sqldf(min_query) )
df_min[ ( df_min['Sort_Type']=='numpy_sort') & ( df_min['List_length']>=40000) & ( df_min['Data_Type_or_List_type']=='random' )  ]

Unnamed: 0,Sort_Type,Data_Type_or_List_type,List_length,Single_runtime
138,numpy_sort,random,40960,0.005008
139,numpy_sort,random,81920,0.010742
140,numpy_sort,random,163840,0.022419
141,numpy_sort,random,327680,0.048081
142,numpy_sort,random,655360,0.099308
143,numpy_sort,random,1310720,0.206241
144,numpy_sort,random,2621440,0.424289
145,numpy_sort,random,5242880,0.871039
146,numpy_sort,random,10485760,1.784178
