In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import seaborn as sns
import numpy as np
from iterextras import par_for
import subprocess as sp
import os

sns.set()

In [None]:
all_dfs = []
for f in glob('../data/*.json'):
    if os.stat(f).st_size == 0:
        continue
    crate = os.path.splitext(os.path.basename(f))[0]
    df = pd.read_json(open(f))
    df['crate'] = crate
    all_dfs.append(df)
df = pd.concat(all_dfs)

In [None]:
def output_nlines(output):
    return len(set(sum([list(range(rng['start_line'], rng['end_line']+1)) for rng in output], [])))
df['nlines'] = df.output.map(output_nlines)

def function_nlines(rng):
    return len(range(rng['start_line'], rng['end_line']+1))
df['func_nlines'] = df.function_range.map(function_nlines)

df['slice_offset'] = df.apply(lambda row: row.slice['start_line'] - row.function_range['start_line'], axis=1)

In [None]:
df['slice_key'] = df.slice.map(lambda s: hash(frozenset(s.items())))
df['slice_key_32bit'] = df.slice_key.map(lambda n: n & 0xfffffff)

In [None]:
from pymer4.models import Lmer

df_big = df[df.contains_call_with_ref & (df.slice_offset > 10)]
model = Lmer('nlines ~ context_mode * mutability_mode * pointer_mode * slice_offset + (1 | crate)', 
             data=df_big[['nlines', 'context_mode', 'mutability_mode', 'pointer_mode', 'slice_offset', 'crate']])
model.fit(factors={
    'mutability_mode': ['DistinguishMut', 'IgnoreMut'],
    'context_mode': ['Recurse', 'SigOnly'],
    'pointer_mode': ['Precise', 'Conservative']
})

In [None]:
_, t = model.post_hoc(marginal_vars=['context_mode', 'pointer_mode', 'mutability_mode'])
t

In [None]:
df_big.groupby(['crate', 'context_mode', 'mutability_mode', 'pointer_mode']).nlines.describe()[['mean', 'std', '50%', 'count']]