# MSR 2018 Challenge Notebook
This notebook contains the work done for the analysis for Tyson Bulmer's MSR 2018 Challgne paper using the supplied dataset.

In [None]:
import psycopg2
import psycopg2.extras
import pandas as pd
import numpy as np

# To deal with right skewness we can take the log of the values
import math

import matplotlib.pyplot as plt

# Configure display of dataframe visualizations
pd.options.display.mpl_style = 'default'

## Connect to database and get event data

In [None]:
conn = psycopg2.connect("dbname='msr2018' user='Tyson' host='localhost' password=''")

In [None]:
cur = conn.cursor()

In [None]:
cur.execute("""select event_type, idesessionuuid, triggeredat, commandid, cancelled, terminatedstate, action, typeofnavigation, wasaborted from events""")

In [None]:
colnames = [desc[0] for desc in cur.description]

In [None]:
rows = cur.fetchall()

In [None]:
results = pd.DataFrame([list(row) for row in rows], columns=colnames)

In [None]:
# Clean up events column strings
results['event_type'] = results['event_type'].apply(lambda x:'.'.join(x.split(',')[0].split('.')[4:]))

In [None]:
# Filter results to only applicable events
events_to_use = [
    'CommandEvent', 'NavigationEvent', 'VisualStudio.WindowEvent',
    'CompletionEvents.CompletionEvent',
       'VisualStudio.EditEvent', 'VisualStudio.DocumentEvent',
       'VisualStudio.DebuggerEvent',
       'VisualStudio.SolutionEvent', 'VisualStudio.IDEStateEvent', 'VisualStudio.BuildEvent',
       'TestRunEvents.TestRunEvent',
       'VersionControlEvents.VersionControlEvent', 'VisualStudio.FindEvent'
]
results = results[results['event_type'].isin(events_to_use)].fillna('')

In [None]:
# Clean up commandid column strings
results['commandid'] = results['commandid'].apply(lambda x:'.'.join(x.split(':')[-1].split('.')[-2:]))

In [None]:
results = results.sort_values('triggeredat')
results

In [None]:
d = {'CommandEvent':'commandid',
     'CompletionEvents.CompletionEvent':'terminatedstate',
     'VisualStudio.DocumentEvent': 'action',
     'VisualStudio.FindEvent':'cancelled',
     'VisualStudio.SolutionEvent':'action',
     'VisualStudio.WindowEvent': 'action',
     'NavigationEvent': 'typeofnavigation',
     'TestRunEvents.TestRunEvent': 'wasaborted'
    }

def complete_events(x):
    try:
        post_fix = str(x[d[x['event_type']]])
        return x['event_type']+'-'+post_fix
    except:
        return x['event_type']
    
results['event_type_complete'] = results.apply(complete_events, axis=1)

In [None]:
results

In [None]:
data = []
for index, group in results.groupby('idesessionuuid', as_index=False):
    events, originals = group['event_type_complete'].tolist(), group['event_type']
    indices = [i for i, x in enumerate(originals) if x == "CommandEvent"]
    
    for ind in indices:
        x = events[:ind]
        y = events[ind]
        if len(x) > 0:
            data.append([' '.join(x), y])
    
data = pd.DataFrame(data, columns=['events', 'command'])

In [None]:
data