In [None]:
import glob
import time
import pandas as pd
import yaml
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
import urllib.request
import matplotlib as mpl


urllib.request.urlretrieve(
    "https://github.com/google/fonts/raw/main/ofl/courierprime/CourierPrime-Regular.ttf",
    "CourierPrime-Regular.ttf",
)
fe = mpl.font_manager.FontEntry(fname="CourierPrime-Regular.ttf", name="courierprime")
mpl.font_manager.fontManager.ttflist.append(fe)
color_cycle = ["#444444", "#1BBC9B", "#a895bb", "#F06060", "#F3B562", "#80cedb"]
mpl.rcParams.update(
    {
        "axes.facecolor": "#f5f4e9",
        "grid.color": "#AAAAAA",
        "axes.edgecolor": "#333333",
        "figure.facecolor": "#FFFFFFFF",
        "axes.grid": False,
        "axes.prop_cycle": plt.cycler(color=color_cycle),
        "font.family": fe.name,
        "font.size": 12,
        "figure.figsize": (4.5, 4.5 / 1.3),
        "figure.dpi": 100,
        "ytick.left": True,
        "xtick.bottom": True,
        "image.cmap": "gist_yarg",
        "lines.markersize": 4,
    }
)

## Load Prompts

In [None]:
topic_categories = ['md', 'spectroscopy', 'bio', 'qm', 'sim', 'cheminf', 'genchem', 'thermo', 'stats', 'plot']
code_categories = ['code', 'human']
lang_categories = ['ch']
keys = ['name', 'language', 'context']

In [None]:
data = None
valid_files = []
def insert_row(r, data):
    if data is None:
        return {k: [v] for k,v in r.items()}
    return  {k: v + [r[k]] for k,v in data.items()}
for fn in glob.glob('../data/**/*.yml'):
    with open(fn, 'r') as f:
        d = yaml.safe_load(f)
        # slice        
        cat_str = d['categories'] 
        d = {k: d[k] for k in keys}
        # duplicate for categories
        tcs = [c for c in topic_categories if c in cat_str]
        ccs = [c for c in code_categories if c in cat_str]
        lcs = [c for c in lang_categories if c in cat_str]
        if len(lcs) == 0:
            lcs = ['en']
        if len(tcs) > 0:
            valid_files.append(fn)
        for tc in tcs:
            for cc in ccs:
                for lc in lcs:
                    d.update({'topic': tc, 'type': cc, 'natlang': lc})                    
                    data = insert_row(d, data)
                    
df = pd.DataFrame.from_dict(data)

In [None]:
df.groupby('topic').count()
len(valid_files)
paths = ' '.join(valid_files)
temperatures = [0.05, 0.2, 0.5]
models = ['incoder/facebook/incoder-1B', 'incoder/facebook/incoder-6B', 'openai/code-cushman-001', 'openai/code-davinci-002']
model_names = ['incoder-1B', 'incoder-6B', 'cushman', 'davinci']
can_insert = [True, True, False, True]
ks = [5, 1, 5, 5]

## Programmatic Prompts

In [None]:

for ins, m, mn, k in zip(can_insert, models, model_names, ks):
    for i,t in enumerate(temperatures):
        out = f'override_{mn}_{i}.csv'
        print(out)
        !nlcc-bench $paths $out --n $k --prompt python --temperature $t --engine $m
        out = f'bench_{mn}_{i}.csv'
        !nlcc-bench $paths $out --n $k --temperature $t --engine $m
        if ins:
            out = f'insert_bench_{mn}_{i}.csv'
            !nlcc-bench $paths $out --n $k --prompt insert --temperature $t --engine $m
        out = f'cheader_bench_{mn}_{i}.csv'
        insert_text = '-insert' if ins else ''
        header = f'header{insert_text}:# MIT License\n\n# Copyright (c) 2022 University of Rochester\n\n'
        !nlcc-bench $paths $out --n $k --prompt "$header" --temperature $t --engine $m        
        out = f'lheader_bench_{mn}_{i}.csv'
        header = f'header{insert_text}:# This is written by an expert Python programmer\n\n'
        !nlcc-bench $paths $out --n $k --prompt "$header" --temperature $t --engine $m


In [None]:
merged = None
for i,t in enumerate(temperatures):
    for ins, m, mn in zip(can_insert, models, model_names):
        f = f'override_{mn}_{i}.csv'
        bf = pd.read_csv(f, delim_whitespace=True)    
        bf = pd.merge(bf, df, how='inner', on='name')
        bf.context.values[:] = 'none'
        if merged is None:
            merged = bf
        else:
            merged = pd.concat((merged, bf))
        f = f'bench_{mn}_{i}.csv'
        bf = pd.read_csv(f, delim_whitespace=True)    
        bf = pd.merge(bf, df, how='inner', on='name')
        merged = pd.concat((merged, bf))
        if ins:
            f = f'insert_bench_{mn}_{i}.csv'
            bf = pd.read_csv(f, delim_whitespace=True) 
            bf = pd.merge(bf, df, how='inner', on='name')
            bf.context.values[:] = [v + '-insert' for v in bf.context.values[:]]
            merged = pd.concat((merged, bf))
        f = f'cheader_bench_{mn}_{i}.csv'
        bf = pd.read_csv(f, delim_whitespace=True)    
        bf = pd.merge(bf, df, how='inner', on='name')
        bf.context.values[:] = [v + '-copyright' for v in bf.context.values[:]]
        merged = pd.concat((merged, bf))
        f = f'lheader_bench_{mn}_{i}.csv'
        bf = pd.read_csv(f, delim_whitespace=True)    
        bf = pd.merge(bf, df, how='inner', on='name')
        bf.context.values[:] = [v + '-authority' for v in bf.context.values[:]]
        merged = pd.concat((merged, bf))
def short_context(c):
    if c == 'none':
        return c
    elif 'copyright' in c:
        return 'copyright'
    elif 'authority' in c:
        return 'authority'
    elif 'insert' in c:
        return 'insert'
    return 'custom'

def short_model(c):
    i = models.index(c)
    return model_names[i]
    
merged = merged.assign(used_context=merged.context.apply(short_context))
merged = merged.assign(model=merged.engine.apply(short_model))
merged.to_pickle('promp_results.pkl')
merged.tail()

In [None]:
merged = pd.read_pickle('promp_results.pkl')

In [None]:
context_order = ['none', 'custom', 'insert', 'copyright', 'authority']

for mn in model_names:
    plt.figure(figsize=(3,3), dpi=90)
    g = sns.FacetGrid(merged[merged.model == mn], col='topic', col_wrap=5, height=1.4, aspect=1.5, hue_order=temperatures)
    g.map(sns.pointplot, 'used_context', 'result', 'temperature', 
          palette='Set2', dodge=True, hue_order=temperatures, order=context_order)
    g.add_legend(title='Temperature', loc='upper left', bbox_to_anchor=(0.87, 1))
    g.set_axis_labels('', 'Accuracy')
    g.map(lambda **kwargs: plt.gca().set_xticklabels(plt.gca().get_xticklabels(), rotation=45))
    g.set_titles('{col_name}')
    g.savefig(f'{mn}-accuracy.pdf')
    g.savefig(f'{mn}-accuracy.png', dpi=300)

In [None]:
plt.figure(figsize=(3,3), dpi=90)
g = sns.FacetGrid(merged[merged.temperature == 0.2], col='topic', 
                  col_wrap=5, height=1.4, aspect=1.5, hue_order=model_names)
g.map(sns.pointplot, 'used_context', 'result', 'model', hue_order=model_names, order=context_order,
      palette='Set2', dodge=True)
g.add_legend(title='Model', loc='upper left', bbox_to_anchor=(0.87, 1))
g.set_axis_labels('', 'Accuracy')
g.map(lambda **kwargs: plt.gca().set_xticklabels(plt.gca().get_xticklabels(), rotation=45))
g.set_titles('{col_name}')
g.savefig('model-comparison.pdf')
g.savefig('model-comparison.png', dpi=300)

In [None]:
print(df.groupby('topic').count().iloc[:,0].to_latex())

In [None]:
plt.figure(figsize=(4,4), dpi=90)
g = sns.FacetGrid(merged, col='name', col_wrap=5, height=2.5, aspect=2, hue_order=model_names)
g.map(sns.pointplot, 'used_context', 'result', 'model',  hue_order=model_names, order=context_order,
      palette='Set2', dodge=True)
g.add_legend(title='Model')
g.set_axis_labels('Context', 'Accuracy')
g.set_titles('{col_name}')
g.savefig('all.pdf')
g.savefig('all.png', dpi=300)

In [None]:
print(context_order)

In [None]:
g = sns.catplot(
    data=merged, kind="bar", col="model",
    x="used_context", y="result", hue="temperature",
    palette="dark", alpha=.6, height=2.5, col_wrap=2, order=context_order
)
g.set_axis_labels('', 'Accuracy')
g.map(lambda **kwargs: plt.gca().set_xticklabels(context_order, rotation=45))
g.set_titles('{col_name} model')
g.savefig('context.pdf')
g.savefig('context.png', dpi=300)

In [None]:
print(merged[(merged.model == 'incoder-6B') & (merged.used_context == 'copyright')].\
groupby('name').max().groupby('topic').mean()['result'].to_latex())
print(merged[(merged.model == 'incoder-6B') & (merged.used_context == 'copyright')].\
groupby('name').max().groupby('topic').mean()['result'].mean())

In [None]:
len(merged['name'].unique())