In [50]:
# COLLAPSE CELL
# PMsearch np.v*
#x = data['mass']
#x?

# from jupyterthemes import jtplot
# jtplot.style(theme='solarized')
# from jupyterlab_table import JSONTable
# JSONTable(df)

from pprint import pprint
import math
import statsmodels.stats as sms
import statsmodels.api as sm
import statsmodels.regression as smr
import scipy.stats as stats
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)

# 04atplotlib inline
# %load_ext heat

plt.ion()
# plt.ioff()

# %heat

import os 
# dir_path = os.path.dirname(os.path.realpath(__file__))
cwd = os.getcwd()

# fig, ax = plt.subplots()
# ax.plot(x, y)

In [51]:
import scipy.stats as scs
import numpy as np
import random


class ipyBanditStrategy(object):
    '''
    Implements a online, learning strategy to solve the Multi-Armed Bandit
      problem.

    Methods
        --------
        sample_bandits(n): sample and train on n pulls.

        Attributes
        -----------
        N : the cumulative number of samples
        choices : the historical choices as a (N,) array
        bb_score: the historical score as a (N,) array
        '''

    def __init__(self, bandits, choice_function, seed=None, **kwargs):
        '''
        Initializes the BanditStrategy given an instance of the Bandits class
          and a choice function.

        Parameters
            -----------
            bandits : a Bandit class with .pull method
            choice_function : str indicating the choice function or a custom
                choice fuction accepting a self argument (which gives access to
                all the BanditStrategy's attributes) which returns an int
                between 0 and n-1
                Accepted str inputs include 'max_mean', 'random_choice',
                'epsilon_greedy', 'softmax', 'ucb1', and 'bayesian_bandit'
            seed : int setting the random seed or None to not seed the random
                number generator
            Also supports passing keyword args which can be used in a
                particular choice function.  Namely 'epsilon_greedy' accepts
                epsilon=float and 'softmax' accepts tau=float
                e.g. strat = BanditStrategy(bandits, 'epsilon_greedy',
                  epsilon=0.1)
        '''
        choice_dict = {'max_mean': self._max_mean,
                       'random_choice': self._random_choice,
                       'epsilon_greedy': self._epsilon_greedy,
                       'softmax': self._softmax,
                       'ucb1': self._ucb1,
                       'bayesian_bandit': self._bayesian_bandit}
        self.bandits = bandits
        n_bandits = len(self.bandits)
        self.wins = np.zeros(n_bandits)
        self.trials = np.zeros(n_bandits)
        self.N = 0
        self.choices = []
        self.score = []
        self.seed = seed
        self._kwargs = kwargs
        if isinstance(choice_function, str):
            if choice_function in choice_dict:
                self.choice_function = choice_dict[choice_function]
            else:
                raise ValueError("{0} not a valid choice function. Valid choices include {1}".format(choice_function, choice_dict.keys()))
        elif not hasattr(choice_function, '__call__'):
            raise ValueError("{0} is not a valid input. Must be str or callable function accepting self".format(choice_function))
        else:
            BanditStrategy.choice_function = choice_function

    def sample_bandits(self, n=1):
        ''' Simulate n rounds of running the bandit machine

        Parameters
        -----------
        n : int number of rounds
        '''
        score = np.zeros(n)
        choices = np.zeros(n)

        # seed the random number generators so you get the same results every
        # time.
        if self.seed:
            np.random.seed(self.seed)
            random.seed(self.seed)

        for k in range(n):
            #sample from the bandits's priors, and select the largest sample
            choice = self.choice_function()

            #sample the chosen bandit
            result = self.bandits.pull(choice)

            #update priors and score
            self.wins[choice] += result
            self.trials[choice] += 1
            score[k] = result
            self.N += 1
            choices[k] = choice

        self.score = np.r_[self.score, score]
        self.choices = np.r_[self.choices, choices]

    def _max_mean(self):
        ''' Pick the bandit with the current best observed proportion of winning

        Returns
        --------
        int : index of the winning bandit
        '''
        # make sure to play each bandit at least once
        if self.trials.min() == 0:
            return np.argmin(self.trials)
        return np.argmax(self.wins / self.trials)

    def _random_choice(self):
        ''' Pick a bandit uniformly at random

        Returns
        --------
        int : index of the winning bandit
        '''
        return np.random.randint(0, len(self.wins))

    def _epsilon_greedy(self):
        '''
        Pick a bandit uniformly at random epsilon percent of the time.
        Otherwise pick the bandit with the best observed proportion of winning

        Returns
        --------
        int : index of the winning bandit
        '''
        # Set default value of epsilon if not provided in init
        epsilon = self._kwargs.get('epsilon', 0.1)

        # Verify that we have attempted each bandit at least once
        if self.trials.min() == 0:
            return np.argmin(self.trials)
        if random.random() < epsilon:
            # Exploration
            return np.random.randint(len(self.bandits))
        else:
            return np.argmax(self.wins / self.trials)

    def _softmax(self):
        ''' Pick an bandit according to the Boltzman Distribution

        Returns
        --------
        int : index of the winning bandit
        '''
        # Set default value of tau if not provided in init
        tau = self._kwargs.get('tau', 0.01)

        # Verify that we have attempted each bandit at least once
        if self.trials.min() == 0:
            return np.argmin(self.trials)

        mean = self.wins / self.trials
        scaled = np.exp(mean / tau)
        probs = scaled / np.sum(scaled)
        return np.random.choice(range(0, len(self.bandits)), p=probs)

    def _ucb1(self):
        ''' Pick the bandit according to the UCB1 strategy

        Returns
        --------
        int : index of the winning bandit
        '''
        # Verify that we have attempted each bandit at least once
        if self.trials.min() == 0:
            return np.argmin(self.trials)

        means = self.wins / self.trials
        confidence_bounds = np.sqrt((2. * np.log(self.N)) / self.trials)
        upper_confidence_bounds = means + confidence_bounds
        return np.argmax(upper_confidence_bounds)

    def _bayesian_bandit(self):
        '''
        Randomly sample from a beta distribution for each bandit and pick
        the one with the largest value

        Returns
        --------
        int : index of the winning bandit
        '''
        samples = [np.random.beta(a=1 + wins, b=1 + trials - wins)
                   for wins, trials in zip(self.wins, self.trials)]
        return np.argmax(samples)


In [52]:
from src.bandits import Bandits
# from src.banditstrategy import BanditStrategy
# from src.banditstrategy_attempt_1 import BanditStrategy

In [53]:
prob_split = [0.05, 0.03, 0.2]

In [54]:
# random choice
bandits = Bandits(prob_split)
strat_rc = ipyBanditStrategy(bandits, 'random_choice')
strat_rc.sample_bandits(1000)
print("Note: Default = splits evenly       | ", "Number of trials: ", strat_rc.trials) 
print("Note: Default = highest weight wins | ", "Number of wins:   ", strat_rc.wins)
print("Note: Should match your weights     | ", "Conversion rates: ", strat_rc.wins / strat_rc.trials )
print("Note: Should be weighted average    | ", "A total of %d wins of %d trials." % \
    (strat_rc.wins.sum(), strat_rc.trials.sum()))



Note: Default = splits evenly       |  Number of trials:  [371. 338. 291.]
Note: Default = highest weight wins |  Number of wins:    [18.  5. 45.]
Note: Should match your weights     |  Conversion rates:  [0.04851752 0.0147929  0.15463918]
Note: Should be weighted average    |  A total of 68 wins of 1000 trials.


In [55]:
# max_mean
bandits = Bandits(prob_split)
strat_mm = ipyBanditStrategy(bandits, 'max_mean')
strat_mm.sample_bandits(1000)
print("Note: Default = splits evenly       | ", "Number of trials: ", strat_mm.trials) 
print("Note: Default = highest weight wins | ", "Number of wins:   ", strat_mm.wins)
print("Note: Should match your weights     | ", "Conversion rates: ", strat_mm.wins / strat_mm.trials )
print("Note: Should be weighted average    | ", "A total of %d wins of %d trials." % \
    (strat_mm.wins.sum(), strat_mm.trials.sum()))



Note: Default = splits evenly       |  Number of trials:  [998.   1.   1.]
Note: Default = highest weight wins |  Number of wins:    [48.  0.  0.]
Note: Should match your weights     |  Conversion rates:  [0.04809619 0.         0.        ]
Note: Should be weighted average    |  A total of 48 wins of 1000 trials.


In [56]:
# epsilon_greedy
bandits = Bandits(prob_split)
strat_eg = ipyBanditStrategy(bandits, 'epsilon_greedy')
strat_eg.sample_bandits(1000)
print("Note: Default = splits evenly       | ", "Number of trials: ", strat_eg.trials) 
print("Note: Default = highest weight wins | ", "Number of wins:   ", strat_eg.wins)
print("Note: Should match your weights     | ", "Conversion rates: ", strat_eg.wins / strat_eg.trials )
print("Note: Should be weighted average    | ", "A total of %d wins of %d trials." % \
    (strat_eg.wins.sum(), strat_eg.trials.sum()))



Note: Default = splits evenly       |  Number of trials:  [ 67.  32. 901.]
Note: Default = highest weight wins |  Number of wins:    [  3.   0. 178.]
Note: Should match your weights     |  Conversion rates:  [0.04477612 0.         0.19755827]
Note: Should be weighted average    |  A total of 181 wins of 1000 trials.


In [57]:
# softmax
bandits = Bandits(prob_split)
strat_sm = ipyBanditStrategy(bandits, 'softmax')
strat_sm.sample_bandits(1000)
print("Note: Default = splits evenly       | ", "Number of trials: ", strat_sm.trials) 
print("Note: Default = highest weight wins | ", "Number of wins:   ", strat_sm.wins)
print("Note: Should match your weights     | ", "Conversion rates: ", strat_sm.wins / strat_sm.trials )
print("Note: Should be weighted average    | ", "A total of %d wins of %d trials." % \
    (strat_sm.wins.sum(), strat_sm.trials.sum()))



Note: Default = splits evenly       |  Number of trials:  [  1.   1. 998.]
Note: Default = highest weight wins |  Number of wins:    [  0.   0. 206.]
Note: Should match your weights     |  Conversion rates:  [0.         0.         0.20641283]
Note: Should be weighted average    |  A total of 206 wins of 1000 trials.


In [58]:
# bayesian bandit
bandits = Bandits(prob_split)
strat_bb = ipyBanditStrategy(bandits, 'bayesian_bandit')
strat_bb.sample_bandits(1000)
print("Note: Default = splits evenly       | ", "Number of trials: ", strat_bb.trials) 
print("Note: Default = highest weight wins | ", "Number of wins:   ", strat_bb.wins)
print("Note: Should match your weights     | ", "Conversion rates: ", strat_bb.wins / strat_bb.trials )
print("Note: Should be weighted average    | ", "A total of %d wins of %d trials." % \
    (strat_bb.wins.sum(), strat_bb.trials.sum()))



Note: Default = splits evenly       |  Number of trials:  [ 24.  27. 949.]
Note: Default = highest weight wins |  Number of wins:    [  0.   1. 188.]
Note: Should match your weights     |  Conversion rates:  [0.         0.03703704 0.19810327]
Note: Should be weighted average    |  A total of 189 wins of 1000 trials.


In [59]:
# for i in dir(BanditStrategy):
#    print (i,"  ",type(getattr(BanditStrategy,i)))

In [60]:
plt.style.use('ggplot')

In [61]:
def sample_bandits(bandit_probs, choice_funcs, num_trials=1000, seed=42):
    ''' Sample bandits a give number of times with every choice function
    Print out the resulting number of wins for each choice strategy

    Parameters
    -----------
    bandit_probs : Array of floats (0 to 1)
        Indicates the underlying bandit probabilites
    choice_funcs : Array or List of str
        str indicating which choice functions to simulate
    num_trials : int (default=1000)
        Number of trials to conduct
    seed : int or None (default=42)
        Number to seed our BanditStrategy with for reproducibility
    '''
    print('True Bandit Probabilities: {}'.format(bandit_probs))
    for func in choice_funcs:
        bandits = Bandits(bandit_probs)
        strat = BanditStrategy(bandits, func, seed)
        strat.sample_bandits(num_trials)
        print("\t{} wins with {}".format(strat.wins.sum(), func))
    print('\n')

In [67]:
bandit_probs = [[0.1, 0.1, 0.1, 0.1, 0.9],
                [0.1, 0.1, 0.1, 0.1, 0.12],
                [0.1, 0.2, 0.3, 0.4, 0.5]]
choice_functions = ['max_mean', 'random_choice',
                    'epsilon_greedy', 'softmax', 'ucb1', 'bayesian_bandit']

for probs in bandit_probs:
    sample_bandits(probs, choice_functions)

True Bandit Probabilities: [0.1, 0.1, 0.1, 0.1, 0.9]
	897.0 wins with max_mean
	263.0 wins with random_choice


TypeError: list indices must be integers or slices, not NoneType

2.718281828459045

In [6]:
def regret(bandit_probs, choices):
    '''
    INPUT: array of floats (0 to 1), array of ints
    OUTPUT: array of floats

    Take an array of the true probabilities for each machine and an
    array of the indices of the machine played at each round.
    Return an array giving the total regret after each round.
    '''
    p_opt = np.max(probabilities)
    return np.cumsum(p_opt - probabilities[choices])
bandit_probs = [[0.1, 0.1, 0.1, 0.1, 0.9],
                [0.1, 0.1, 0.1, 0.1, 0.12],
                [0.1, 0.2, 0.3, 0.4, 0.5]]
choices = strat.choices.astype(int)
