# Import libraries

In [None]:
import os, glob, csv, sys, re, operator
from os import path
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import plot, init_notebook_mode, iplot

%matplotlib inline

# Configure repository and directories

In [None]:
userhome = os.path.expanduser('~')
txt_file = open(userhome + r"/DifferentDiffAlgorithms/SZZ/code_document/project_identity.txt", "r")
pid = txt_file.read().split('\n')
project = pid[0]
bugidentifier = pid[1]
proj = project.upper()
analyze_dir = userhome + r'/DifferentDiffAlgorithms/SZZ/projects_analyses/' + project + '/'

print ("Project name = %s" % project)
print ("Project key = %s" % bugidentifier)

# Find valid bug-related lines

In [None]:
algorithms = ['myers','histogram']

In [None]:
dt_myers = pd.read_csv(analyze_dir + "04_affected_versions/02_grouping_affected_non-affected_files/myers_affected_files.csv")
dt_histogram = pd.read_csv(analyze_dir + "04_affected_versions/02_grouping_affected_non-affected_files/histogram_affected_files.csv")

dt_myers

In [None]:
bugline_myers = dt_myers['myers_#validbugline'].sum()
bugline_histogram = dt_histogram['histogram_#validbugline'].sum()
validbugline = [[proj,'Myers',bugline_myers],[proj,'Histogram',bugline_histogram]]
cols = ['project_name','algorithm','#valid_bug-related_line']
with open (analyze_dir + '05_validation/01_valid_buggyline/number_of_valid_bug-related_line.csv','w') as csvfile:
    writers = csv.writer(csvfile, delimiter=',')
    writers.writerow(cols)
    for i in validbugline:
        writers.writerow(i)
bg = pd.read_csv(analyze_dir + '05_validation/01_valid_buggyline/number_of_valid_bug-related_line.csv')
bg

# Find valid files

In [None]:
mycols = ['bug_id','bugfix_commitID','parent_id','filepath','diff_myers_file','blame_myers_file',
          'myers_diffnblame_file','myers_#validbugline','myers_affect_version?']
myersvalidfiles = dt_myers[mycols]
myersvalidfiles.to_csv(analyze_dir + '05_validation/02_validfiles/myers_valid_files.csv', index=False)

In [None]:
histcols = ['bug_id','bugfix_commitID','parent_id','filepath','diff_histogram_file','blame_histogram_file','histogram_diffnblame_file','histogram_#validbugline','histogram_affect_version?']
histogramvalidfiles = dt_histogram[histcols]
histogramvalidfiles.to_csv(analyze_dir + '05_validation/02_validfiles/histogram_valid_files.csv', index=False)

In [None]:
validfile = [[proj,'Myers',len(myersvalidfiles)],[proj,'Histogram',len(histogramvalidfiles)]]
cols = ['project_name','algorithm','#valid_file']
with open (analyze_dir + '05_validation/02_validfiles/number_of_valid_files.csv','w') as csvfile:
    writers = csv.writer(csvfile, delimiter=',')
    writers.writerow(cols)
    for i in validfile:
        writers.writerow(i)
vf = pd.read_csv(analyze_dir + '05_validation/02_validfiles/number_of_valid_files.csv')
vf

# Find the valid bug-fix commit id

Myers

In [None]:
df_bugfixcid_myers = dt_myers.groupby('bugfix_commitID', as_index=False).agg({"myers_#validbugline":"sum"})
df_bugfixcid_myers.to_csv(analyze_dir + '05_validation/03_validbugfixcommitid/myers_valid_bugfix_commitid.csv')
df_bugfixcid_myers

Histogram

In [None]:
df_bugfixcid_hist = dt_histogram.groupby('bugfix_commitID', as_index=False).agg({"histogram_#validbugline":"sum"})
df_bugfixcid_hist.to_csv(analyze_dir + '05_validation/03_validbugfixcommitid/histogram_valid_bugintro_commitid.csv')
df_bugfixcid_hist

In [None]:
validbugfix = [[proj,'Myers',len(df_bugfixcid_myers)],[proj,'Histogram',len(df_bugfixcid_hist)]]
cols = ['project_name','algorithm','#valid_bugfix_commitid']
with open (analyze_dir + '05_validation/03_validbugfixcommitid/number_of_valid_bugfixcommitid.csv','w') as csvfile:
    writers = csv.writer(csvfile, delimiter=',')
    writers.writerow(cols)
    for i in validbugfix:
        writers.writerow(i)
vbf = pd.read_csv(analyze_dir + '05_validation/03_validbugfixcommitid/number_of_valid_bugfixcommitid.csv')
vbf

# Find valid bug-introducing change commit id

In [None]:
find_dirs = analyze_dir + '03_annotate/02_diff-file_blame-file_comparison/'
dirs = []
for root, folder, fileList in os.walk(find_dirs):
    if folder != []:
        dirs.append(folder)
dirs.sort()
dirs = dirs[0]
dirs

In [None]:
column_name = ['bug_id','bugintro_commitid','line_number','buggy_code','does_bugline_from_originfile?',
               'date_added_in_originfile','earliest_affected_version','version_release_date','bug_class']
algorithms = ['myers','histogram']

In [None]:
dt = [dt_myers,dt_histogram]

In [None]:
for alg in range(0,len(algorithms)):   
    result = []
    dataset = dt[alg]
    for n in range(0, len(dataset)):
        sys.stdout.write("\r{} Reading file no: {}".format((alg+1),(n+1)))
        sys.stdout.flush()
        
        dx = dataset.iloc[n]
        file = find_dirs + dirs[alg] + "/" + dx[6]

        numbug = 1
        with open(file,'r') as myfile:
            csv_reader = csv.reader(myfile, delimiter=',')
            for row in csv_reader:
                if row[8] == 'bug-introducing change':
                    tmp = [dx[0],dx[1],dx[2],dx[3],dx[6],row[1],row[8],numbug]
                    result.append(tmp)

    with open(analyze_dir + "05_validation/04_validbugintrocommitid/" + algorithms[alg] + "_valid_bugintro_commitid.csv",'w') as csvfile:
        fields = ['bug_id','bugfix_commitID', 'parent_id','filepath','diffnblame_file','bugintro_commitid',
                  'bug_class','#deletion']
        writers = csv.writer(csvfile, delimiter=",")
        writers.writerow(fields)
        for item in result:
            writers.writerow(item)
            
print ("\nThe process is complete")

In [None]:
bicm = pd.read_csv(analyze_dir + "05_validation/04_validbugintrocommitid/myers_valid_bugintro_commitid.csv")
bugintromyers = bicm.groupby('bugintro_commitid', as_index=False).agg({"#deletion":"sum"})
bugintromyers

In [None]:
bich = pd.read_csv(analyze_dir + "05_validation/04_validbugintrocommitid/histogram_valid_bugintro_commitid.csv")
bugintrohist = bich.groupby('bugintro_commitid', as_index=False).agg({"#deletion":"sum"})
bugintrohist

In [None]:
validbugintro = [[proj,'Myers',len(bugintromyers)],[proj,'Histogram',len(bugintrohist)]]
cols = ['project_name','algorithm','#valid_bugintro_commitid']
with open (analyze_dir + '05_validation/04_validbugintrocommitid/number_of_valid_bugintrocommitid.csv','w') as csvfile:
    writers = csv.writer(csvfile, delimiter=',')
    writers.writerow(cols)
    for i in validbugintro:
        writers.writerow(i)
vbi = pd.read_csv(analyze_dir + '05_validation/04_validbugintrocommitid/number_of_valid_bugintrocommitid.csv')
vbi

# Summarize the results

In [None]:
df_merge1 = bg.merge(vf, on=['project_name','algorithm'])
df_merge2 = df_merge1.merge(vbf, on=['project_name','algorithm'])
df_merge3 = df_merge2.merge(vbi, on=['project_name','algorithm'])
df_merge3.to_csv(analyze_dir + "05_validation/05_stats/summary_of_validation.csv", index=False)

In [None]:
data_stat = pd.read_csv(analyze_dir + "05_validation/05_stats/summary_of_validation.csv")
data_stat

# Visualizing number of validated buglines in graphs

In [None]:
hist = go.Bar(
    x = ['Myers','Histogram'],
    y = [data_stat.iloc[0][2],data_stat.iloc[1][2]],
    text = [data_stat.iloc[0][2],data_stat.iloc[1][2]],
    textposition = 'auto',
    marker = dict(
        color = 'rgb(150,255,200)',
        line = dict(
            color='rgb(8,48,107)',
            width=1.5),
    ),
    opacity=0.6
)

data = [hist]
layout = go.Layout(
    title = "Number of validated buglines in " + proj + " Project"
)

init_notebook_mode(connected=True)
fig = go.Figure(data=data, layout=layout)
iplot(fig, show_link=False)

# Visualizing number of validated files, bug-intro and bug-fix commit in graphs

In [None]:
nummyers = go.Bar(
    x = ['#valid_file','#valid_bugfix_commitid','#valid_bugintro_commitid'],
    y = [data_stat.iloc[0][3],data_stat.iloc[0][4],data_stat.iloc[0][5]],
    text = [data_stat.iloc[0][3],data_stat.iloc[0][4],data_stat.iloc[0][5]],
    textposition = 'auto',
    name = 'Myers'
)


numhist = go.Bar(
    x = ['#valid_file','#valid_bugfix_commitid','#valid_bugintro_commitid'],
    y = [data_stat.iloc[1][3],data_stat.iloc[1][4],data_stat.iloc[1][5]],
    text = [data_stat.iloc[1][3],data_stat.iloc[1][4],data_stat.iloc[1][5]],
    textposition = 'auto',
    name = 'Histogram'
)

data = [nummyers,numhist]
layout = go.Layout(
    title = "Number of valid files, bug-intro commit id and bugfix commit id in " + proj + " Project"
)

init_notebook_mode(connected=True)
fig = go.Figure(data=data, layout=layout)
iplot(fig, show_link=False)