# Importing libraries

In [None]:
import os, sys, csv
from os import path
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import plot, init_notebook_mode, iplot

%matplotlib inline

# Defining directories

In [None]:
userhome = os.path.expanduser('~')
analyze_dir = userhome + r'/different-diff/analyze/collect_metrics/analyze_crate.io/'

# Load merged dataset

In [None]:
datamerge = pd.read_csv(analyze_dir + '04_merge_dataframe/datamerge_all_algorithms.csv')
datamerge

# Separating columns of dataset

In [None]:
#defining colums
ins_field = ['filename','#insertions_histogram','#insertions_minimal','#insertions_myers','#insertions_patience']
del_field = ['filename','#deletions_histogram','#deletions_minimal','#deletions_myers','#deletions_patience']

In [None]:
#dataframe for insertions
df_ins = datamerge[ins_field]
df_ins.to_csv(analyze_dir + '05_separating_files/number_of_insertions_file.csv')
df_insert = pd.read_csv(analyze_dir + '05_separating_files/number_of_insertions_file.csv')
df_insert = df_insert[ins_field]
df_insert

In [None]:
#dataframe for deletions
df_del = datamerge[del_field]
df_del.to_csv(analyze_dir + '05_separating_files/number_of_deletions_file.csv')
df_delete = pd.read_csv(analyze_dir + '05_separating_files/number_of_deletions_file.csv')
df_delete = df_delete[del_field]
df_delete

# Capturing data with different number of insertions

In [None]:
df_diffins = df_insert[df_insert.iloc[:,-4:].nunique(1).gt(1)]
df_diffins.to_csv(analyze_dir + '05_separating_files/different_insertions.csv', index=False)
df_diffins

# Capturing data with different number of deletions

In [None]:
df_diffdel = df_delete[df_delete.iloc[:,-4:].nunique(1).gt(1)]
df_diffdel.to_csv(analyze_dir + '05_separating_files/different_deletions.csv', index=False)
df_diffdel

# Percentage of files having different number

Percentage of different insertions

In [None]:
ins_percent = (len(df_diffins) / len(df_insert)) * 100
rest_ins = 100 - ins_percent
print ("{0:.2f}%".format(ins_percent))

In [None]:
labels = ['different number of insertions', 'same number of insertions']
values = [ins_percent, rest_ins]
colors = ['#FF396C','#96D38C']

trace = go.Pie(
    labels=labels,
    values=values,
    hoverinfo='label+percent', textinfo='value', 
            textfont=dict(size=15),
            marker=dict(colors=colors, line=dict(color='#000000', width=2))
)

data = [trace]
layout = go.Layout(
    title = "Percentage of files having different number of insertions in Crate.io Project"
)

init_notebook_mode(connected=True)
fig = go.Figure(data=data, layout=layout)
iplot(fig, show_link=False)

Percentage of different deletions

In [None]:
del_percent = (len(df_diffdel) / len(df_delete)) * 100
rest_del = 100 - del_percent
print ("{0:.2f}%".format(del_percent))

In [None]:
labels = ['different number of deletions', 'same number of deletions']
values = [del_percent, rest_del]
colors = ['#FF396C','#96D38C']

trace = go.Pie(
    labels=labels,
    values=values,
    hoverinfo='label+percent', textinfo='value', 
            textfont=dict(size=15),
            marker=dict(colors=colors, line=dict(color='#000000', width=2))
)

data = [trace]
layout = go.Layout(
    title = "Percentage of files having different number of deletions in Crate.io Project"
)

init_notebook_mode(connected=True)
fig = go.Figure(data=data, layout=layout)
iplot(fig, show_link=False)

In [None]:
trace = go.Bar(
    x = ['insertions','deletions'],
    y = [ins_percent, del_percent],
    text = [ins_percent, del_percent],
    textposition = 'auto',
    marker = dict(
        color = 'rgb(100,255,150)',
        line = dict(
            color = '#000000',
            width = 1.5
        ),
    ),
    opacity = 0.6
)

data = [trace]

layout = go.Layout(
    title = 'Percentage of files having different number in Crate.io Project'
)

init_notebook_mode(connected=True)
fig = go.Figure(data=data,layout=layout)
iplot(fig, show_link=False)