In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import seaborn as sns
import numpy as np
from iterextras import par_for
import subprocess as sp
import os

sns.set()

In [2]:
all_dfs = []
for f in glob('../data/*.json'):
    if os.stat(f).st_size == 0:
        continue
    crate = os.path.splitext(os.path.basename(f))[0]
    df = pd.read_json(open(f))
    df['crate'] = crate
    all_dfs.append(df)
df = pd.concat(all_dfs)

In [4]:
def output_nlines(output):
    return len(set(sum([list(range(rng['start_line'], rng['end_line']+1)) for rng in output], [])))
df['nlines'] = df.output.map(output_nlines)

def function_nlines(rng):
    return len(range(rng['start_line'], rng['end_line']+1))
df['func_nlines'] = df.function_range.map(function_nlines)

df['slice_offset'] = df.apply(lambda row: row.slice['start_line'] - row.function_range['start_line'], axis=1)

In [6]:
df['slice_key'] = df.slice.map(lambda s: hash(frozenset(s.items())))
df['slice_key_32bit'] = df.slice_key.map(lambda n: n & 0xfffffff)

In [7]:
from pymer4.models import Lmer

df_big = df[df.contains_call_with_ref & (df.slice_offset > 10)]
model = Lmer('nlines ~ context_mode * mutability_mode * pointer_mode * slice_offset + (1 | crate)', 
             data=df_big[['nlines', 'context_mode', 'mutability_mode', 'pointer_mode', 'slice_offset', 'crate']])
model.fit(factors={
    'mutability_mode': ['DistinguishMut', 'IgnoreMut'],
    'context_mode': ['Recurse', 'SigOnly'],
    'pointer_mode': ['Precise', 'Conservative']
})

Formula: nlines~context_mode*mutability_mode*pointer_mode*slice_offset+(1|crate)

Family: gaussian	 Inference: parametric

Number of observations: 49184	 Groups: {'crate': 7.0}

Log-likelihood: -234720.516 	 AIC: 469441.032

Random effects:

                 Name      Var     Std
crate     (Intercept)   30.053   5.482
Residual               816.579  28.576

No random effect correlations specified

Fixed effects:



Unnamed: 0,Estimate,2.5_ci,97.5_ci,SE,DF,T-stat,P-val,Sig
(Intercept),2.402,-1.867,6.671,2.178,6.725,1.103,0.308,
context_mode1,-1.913,-3.25,-0.577,0.682,49162.318,-2.806,0.005,**
mutability_mode1,-0.025,-1.361,1.312,0.682,49162.318,-0.036,0.971,
pointer_mode1,0.041,-1.296,1.377,0.682,49162.318,0.059,0.953,
slice_offset,0.2,0.186,0.214,0.007,49165.117,28.516,0.0,***
context_mode1:mutability_mode1,-1.436,-3.326,0.454,0.964,49162.318,-1.489,0.136,
context_mode1:pointer_mode1,-0.003,-1.894,1.887,0.964,49162.318,-0.004,0.997,
mutability_mode1:pointer_mode1,0.026,-1.864,1.916,0.964,49162.318,0.027,0.978,
context_mode1:slice_offset,0.095,0.076,0.114,0.01,49162.318,9.614,0.0,***
mutability_mode1:slice_offset,0.056,0.037,0.076,0.01,49162.318,5.697,0.0,***


In [8]:
_, t = model.post_hoc(marginal_vars=['context_mode', 'pointer_mode', 'mutability_mode'])
t

P-values adjusted by tukey method for family of 28 estimates


Unnamed: 0,Contrast,Estimate,2.5_ci,97.5_ci,SE,DF,Z-stat,P-val,Sig
0,Recurse Precise DistinguishMut - SigOnly Preci...,-2.379,-3.941,-0.817,0.515,inf,-4.615,0.0,***
1,Recurse Precise DistinguishMut - Recurse Conse...,-0.051,-1.613,1.512,0.515,inf,-0.098,1.0,
2,Recurse Precise DistinguishMut - SigOnly Conse...,-2.426,-3.988,-0.863,0.515,inf,-4.706,0.0,***
3,Recurse Precise DistinguishMut - Recurse Preci...,-2.519,-4.081,-0.957,0.515,inf,-4.887,0.0,***
4,Recurse Precise DistinguishMut - SigOnly Preci...,-4.745,-6.307,-3.183,0.515,inf,-9.207,0.0,***
5,Recurse Precise DistinguishMut - Recurse Conse...,-2.577,-4.139,-1.015,0.515,inf,-5.0,0.0,***
6,Recurse Precise DistinguishMut - SigOnly Conse...,-4.791,-6.353,-3.229,0.515,inf,-9.295,0.0,***
7,SigOnly Precise DistinguishMut - Recurse Conse...,2.328,0.766,3.89,0.515,inf,4.517,0.0,***
8,SigOnly Precise DistinguishMut - SigOnly Conse...,-0.047,-1.609,1.515,0.515,inf,-0.091,1.0,
9,SigOnly Precise DistinguishMut - Recurse Preci...,-0.14,-1.702,1.422,0.515,inf,-0.272,1.0,


In [12]:
df_big.groupby(['crate', 'context_mode', 'mutability_mode', 'pointer_mode']).nlines.describe()[['mean', 'std', '50%', 'count']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean,std,50%,count
crate,context_mode,mutability_mode,pointer_mode,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Rocket,Recurse,DistinguishMut,Conservative,2.971014,6.121673,0.0,207.0
Rocket,Recurse,DistinguishMut,Precise,2.951691,6.10726,0.0,207.0
Rocket,Recurse,IgnoreMut,Conservative,4.410628,7.695698,0.0,207.0
Rocket,Recurse,IgnoreMut,Precise,3.714976,6.760172,0.0,207.0
Rocket,SigOnly,DistinguishMut,Conservative,2.975845,6.125262,0.0,207.0
Rocket,SigOnly,DistinguishMut,Precise,2.956522,6.110872,0.0,207.0
Rocket,SigOnly,IgnoreMut,Conservative,4.415459,7.697645,0.0,207.0
Rocket,SigOnly,IgnoreMut,Precise,3.719807,6.762887,0.0,207.0
hyper,Recurse,DistinguishMut,Conservative,1.428571,2.992053,0.0,7.0
hyper,Recurse,DistinguishMut,Precise,1.428571,2.992053,0.0,7.0
