In [None]:
from math import log

import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import scipy


sns.set()

In [None]:
paths = ["glassermanli1.txt", "glassermanli2.txt", "glassermanli3.txt", "glassermanli4.txt"]
converters={'mu': list, 'glassermanli': float, 'l': float}
dfs = map(lambda x: pd.read_csv("../" + x, sep='\t', converters=converters), paths)
glassermanlidf = pd.concat(dfs)
glassermanlidf = glassermanlidf.rename(columns={"glassermanli": "mc"})
for k in ['mc']:
    print(scipy.stats.describe(glassermanlidf[k]))
print(len(glassermanlidf))
glassermanlidf['algo'] = 'glassermanli_useoptimization'
glassermanlidf.head()
glassermanlidf.mc.dtype

In [None]:
paths = ["gl1.txt", "gl2.txt", "gl3.txt"]
dfs = map(lambda x: pd.read_csv("../" + x), paths)
gldf = pd.concat(dfs)
for k in ['mc']:
    print(scipy.stats.describe(gldf[k]))
print(len(gldf))
gldf['algo'] = 'glassermanli'
gldf.head()

In [None]:
path = "../bernoulli2000.txt"
bdf = pd.read_csv(path)
for k in ['mc']:
    print(scipy.stats.describe(bdf[k]))
print(len(bdf))
bdf['algo'] = 'bernoulli'
bdf.head()

In [None]:
gl_slow_path = os.path.join("..", "..", "dcs_summer", "fullcredit", "RecreatingBinaryCreditProblem", "GlassermanIS_slow")
logpaths = [
    "compare_methods_S5_l0.01.txt",
    "compare_methods_S5_l0.03.txt",
    "compare_methods_S5_l0.04.txt",
    "compare_methods_S5_l0.05.txt",
    "compare_methods_S5_l0.06.txt",
    "compare_methods_S5_l0.10.txt",
    "compare_methods_S5_l0.15.txt",
    "compare_methods_S5_l0.20.txt",
    "compare_methods_S5_l0.25.txt",
    "compare_methods_S5_l0.30.txt",
    "compare_methods_S5_l0.80.txt",
]
logpaths = map(lambda x: os.path.join(gl_slow_path, x), logpaths)
dfs = map(lambda path: pd.read_csv(path, skiprows=[0]), logpaths)
slowdf = pd.concat(dfs)
slowdf['log_mc'] =  map(lambda x: log(x) if x != 0 else np.nan, slowdf['mean'])
slowdf.head()

In [None]:
from collections import defaultdict
longrun = defaultdict(list)
for (algo, tail), group in slowdf.groupby(['algo', 'tail']):
    longrun[algo].append((tail, np.mean(group['log_mc'][10:])))
for k, v in longrun.iteritems():
    longrun[k] = zip(*v)
longrun

In [None]:
df = pd.concat((bdf, gldf, glassermanlidf))
print(len(df))
df['log_mc'] = map(lambda x: log(x) if x != 0 else np.nan, df['mc'])
df.head()

In [None]:
logscale = True
mc_key = 'mc' if not logscale else 'log_mc'

plt.rcParams["figure.figsize"] = (12,12)
for algo, group in df.groupby(['algo']):
    if algo == 'bernoulli':
        label = "{} (NZ,NE)=(2000, 2000) 30nrun/l".format(algo)
        plt.plot(group['l'], group[mc_key], 'o', label=label, alpha=0.8)
    elif algo == 'glassermanli':
        for mu, group in group.groupby(['mu']):
            if mu == 0:
                label = "{} mu:{} (NZ,NE)=(1000, 1000) 10nrun/l".format(algo, mu)
                plt.plot(group['l'], group[mc_key], 'o', label=label, alpha=0.8)
    else:
        label = "outer/inner optimization (NZ,NE)=(1000,1000) 10run/l"
        plt.plot(group['l'], group[mc_key], 'o', label=label, alpha=0.8)
            
    
# for algo, (xs, ys) in longrun.iteritems():
#     label = "{} (longrun)".format(algo)
#     plt.plot(xs, ys, '-', label=label, lw=5)

plt.legend()
plt.xlim((-0.01,0.62))
plt.ylim((-22.5, 2))
plt.xlabel("tail l")
plt.ylabel("Log MC estimate")
plt.title("Log MC estimates for ~1200runs of bernoulli_mc and ~200runs of glassermanli_mc vs. tail l")
plt.savefig("plt.pdf")