In [14]:
import matplotlib.pyplot as plt
import pandas as pd
import sys
sys.path.append('../utils')
import database as db

### safe/unsafe in commit locality

In [None]:
df_data = pd.read_sql("SELECT cve_id, hash, num_lines_added, num_lines_deleted,num_files FROM ext_commits", con=db.conn)
df_data.drop_duplicates(subset=['cve_id', 'hash'], keep='first', inplace=True)
df_data['num_lines_added'] = df_data['num_lines_added'].apply(lambda x: int(x))
df_data['num_lines_deleted'] = df_data['num_lines_deleted'].apply(lambda x: int(x))
df_data['num_files'] = df_data['num_files'].apply(lambda x: int(x))
def print_statistics(col):
    print(f"# average {col}: " + str(df_data[col].sum()/len(df_data)))
    print(f"# median {col}: " + str(df_data[col].median()))
    print(f"# minimum {col}: " + str(df_data[col].min()))
    print(f"# maximum {col}: " + str(df_data[col].max()))
    print(f"# total {col}: " + str(df_data[col].sum()))
    print(f"std of {col}: " + str(df_data[col].std()))
    print("==================================")
print_statistics('num_lines_added')
print_statistics('num_lines_deleted')
print_statistics('num_files')

df_data = pd.read_sql("SELECT * FROM vul_safe_unsafe", con=db.conn)
df_data["unsafe_trait"] = float('nan')
# df_manual = pd.read_sql("SELECT * FROM vul_safe_unsafe_manual2", con=db.conn)
# df = df.append(df_manual, ignore_index=True)

df_data['safe_func_fix'] = df_data.apply(lambda x: max(float(x.safe_func_fix), float(x.safe_func)), axis=1)
df_data['unsafe_func_fix'] = df_data.apply(lambda x: max(float(x.unsafe_func_fix), float(x.unsafe_func)), axis=1)
df_data['unsafe_block_fix'] = df_data.apply(lambda x: max(float(x.unsafe_block_fix), float(x.unsafe_block)), axis=1)
print_statistics('safe_func_fix')
print_statistics('unsafe_func_fix')
print_statistics('unsafe_block_fix')

# average num_lines_added: 838.54806312769
# median num_lines_added: 34.0
# minimum num_lines_added: 0
# maximum num_lines_added: 153037
# total num_lines_added: 584468
std of num_lines_added: 10069.50755500708
# average num_lines_deleted: 881.2568149210904
# median num_lines_deleted: 8.0
# minimum num_lines_deleted: 0
# maximum num_lines_deleted: 178697
# total num_lines_deleted: 614236
std of num_lines_deleted: 11809.66251385429
# average num_files: 4.084648493543759
# median num_files: 2.0
# minimum num_files: 1
# maximum num_files: 363
# total num_files: 2847
std of num_files: 15.177304864769708
# average safe_func_fix: 31.55855855855856
# median safe_func_fix: 4.0
# minimum safe_func_fix: 0.0
# maximum safe_func_fix: 2996.0
# total safe_func_fix: 10509.0
std of safe_func_fix: 211.07804263983257
# average unsafe_func_fix: 0.7477477477477478
# median unsafe_func_fix: 0.0
# minimum unsafe_func_fix: 0.0
# maximum unsafe_func_fix: 66.0
# total unsafe_func_fix: 249.0
std of unsafe_func_

### safe/unsafe in commit locality across type

In [28]:
df_vul = pd.read_sql("SELECT id, package, sfp_id, published, severity FROM cve", con=db.conn)
df = pd.read_sql("SELECT * FROM vul_safe_unsafe", con=db.conn)
df["unsafe_trait"] = 0
# df_manual = pd.read_sql("SELECT * FROM vul_safe_unsafe_manual2", con=db.conn)
# df = df.append(df_manual, ignore_index=True)

df_files = pd.read_sql("SELECT cve_id, hash, num_files FROM ext_commits", con=db.conn)
df['safe_function'] = df.apply(lambda x: max(float(x.safe_func_fix), float(x.safe_func)), axis=1)
df['unsafe_function'] = df.apply(lambda x: max(float(x.unsafe_func_fix), float(x.unsafe_func)), axis=1)
df['unsafe_block'] = df.apply(lambda x: max(float(x.unsafe_block_fix), float(x.unsafe_block)), axis=1)
df['unsafe_trait'] = df['unsafe_trait'].apply(lambda x: float(x))
def get_cat(x, cat):
    temp = eval(df_vul[df_vul['id']==x]['sfp_id'].values[0]) if len(df_vul[df_vul['id']==x]['sfp_id'].values)>0 else []
    if cat in temp:
        return True
    return False
cats = ['Memory Access', 'Memory Management', 'Synchronization', 'Tainted Input', 'Resource Management', 'Exception Management', 'Path Resolution']
for cat in cats:
    df_data = df[df["cve_id"].apply(lambda x: get_cat(x, cat))]
    if df_data.empty:
        print(f"Category {cat} is empty")
        continue
    
    def print_statistics(col):
        print(f"# average {col}: " + str(df_data[col].sum()/len(df_data)))
        print(f"# median {col}: " + str(df_data[col].median()))
        print(f"# minimum {col}: " + str(df_data[col].min()))
        print(f"# maximum {col}: " + str(df_data[col].max()))
        print(f"# toatl {col}: " + str(df_data[col].sum()))
        print(f"std of {col}: " + str(df_data[col].std()))
        print("==================================")
    print("Commit Locality Across "+cat)
    print(f"Number of rows: {len(df_data)}")
    print_statistics('safe_function')
    print_statistics('unsafe_function')
    print_statistics('unsafe_block')
    print_statistics('unsafe_trait')
    print("\n")
    

Commit Locality Across Memory Access
Number of rows: 69
# average safe_function: 9.81159420289855
# median safe_function: 2.0
# minimum safe_function: 0.0
# maximum safe_function: 346.0
# toatl safe_function: 677.0
std of safe_function: 41.43750622830894
# average unsafe_function: 1.3333333333333333
# median unsafe_function: 0.0
# minimum unsafe_function: 0.0
# maximum unsafe_function: 66.0
# toatl unsafe_function: 92.0
std of unsafe_function: 7.966227734236918
# average unsafe_block: 3.420289855072464
# median unsafe_block: 1.0
# minimum unsafe_block: 0.0
# maximum unsafe_block: 132.0
# toatl unsafe_block: 236.0
std of unsafe_block: 15.844284362935802
# average unsafe_trait: 0.0
# median unsafe_trait: 0.0
# minimum unsafe_trait: 0.0
# maximum unsafe_trait: 0.0
# toatl unsafe_trait: 0.0
std of unsafe_trait: 0.0


Commit Locality Across Memory Management
Number of rows: 65
# average safe_function: 16.523076923076925
# median safe_function: 4.0
# minimum safe_function: 0.0
# maximum safe

### Fix patterns

In [32]:
df_auto = pd.read_sql("SELECT * FROM vul_safe_unsafe", con=db.conn)
# df_manual = pd.read_sql("SELECT * FROM vul_safe_unsafe_manual2", con=db.conn)
# df_data = df_auto.append(df_manual).reset_index()
df_data = df_auto

In [33]:
# vulnerability locality: safe function
df_safe = df_data
df_safe = df_safe[df_safe["safe_func"]!=0]
print(len(df_safe))
add = len(df_safe[df_safe.apply(lambda x: True if x.safe_func < x.safe_func_fix else False, axis=1)])
remove = len(df_safe[df_safe.apply(lambda x: True if x.safe_func > x.safe_func_fix else False, axis=1)])
modified = len(df_safe[df_safe.apply(lambda x: True if x.safe_func == x.safe_func_fix else False, axis=1)])
print(add)
print(remove)
print(modified)

333
70
58
205


In [34]:
df_unsafe = df_data
df_unsafe = df_unsafe[df_unsafe["unsafe_func"]!=0]
# df_unsafe = df_unsafe[df_unsafe["unsafe_block"]==0]
print(len(df_unsafe))
add = len(df_unsafe[df_unsafe.apply(lambda x: True if x.unsafe_func < x.unsafe_func_fix else False, axis=1)])
modified = len(df_unsafe[df_unsafe.apply(lambda x: True if x.unsafe_func ==x.unsafe_func_fix else False, axis=1)])
remove = len(df_unsafe[df_unsafe.apply(lambda x: True if x.unsafe_func > x.unsafe_func else False, axis=1)])
print(add)
print(remove)
print(modified)

333
14
0
314


In [35]:
df_unsafe = df_data
df_unsafe = df_unsafe[df_unsafe["unsafe_block"]!=0]
# df_unsafe = df_unsafe[df_unsafe["unsafe_block"]==0]
print(len(df_unsafe))
add = len(df_unsafe[df_unsafe.apply(lambda x: True if x.unsafe_block < x.unsafe_block_fix else False, axis=1)])
modified = len(df_unsafe[df_unsafe.apply(lambda x: True if x.unsafe_block == x.unsafe_block_fix else False, axis=1)])
remove = len(df_unsafe[df_unsafe.apply(lambda x: True if x.unsafe_block > x.unsafe_block_fix else False, axis=1)])
print(add)
print(remove)
print(modified)

333
25
35
273
