In [None]:
# default_exp abstractions

# abstractions
> abstract classes for MAB solvers

## MAB


In [None]:
#export
import numpy as np

class AbstractNoncontextualMAB(object):
    def get_probaility(self, arm):
        pass
    
    def get_reward(self, arm):
        pass
    
    def get_best_arm(self):
        pass
    
    
class AbstractContextualMAB(object):
    
    def __init__(self, theta, noise, linear=False):
        
        self.theta = theta
        self.noise = noise
        self.num_arms = len(theta)
        
        if linear:
            self. activation = lambda x: x
        else:
            self.activation = lambda x: 1 / (1 + np.exp(-x))
        
    def get_probaility(self, arm, context):
        pass
    
    def get_reward(self, arm, context):
        pass
    
    def get_best_arm(self, context):
        pass
    
    
    


In [None]:
a = AbstractContextualMAB(theta = [0.1,0.9], noise=0, linear = False)
a.activation(9)

0.9998766054240137

In [None]:
b = AbstractContextualMAB(theta= [0.1,0.9], noise=0, linear = True)
b.activation(9)

9

## Solvers

In [None]:
#export
class AbstractSolver(object):
    def choose_arm(self):
        """choose an arm to play according to internal policy"""
        raise NotImplementedError
        
    def update(self, arm, reward):
        """ update internal policy to reflect changed knowledge"""
        raise NotImplementedError


In [None]:
#export
from collections import OrderedDict


class AbstractContextualSolver(object):
    def __init__(self, model_type, num_arms, num_context, model_params={}):
        self.model_list = [model_type(num_context, **model_params) for x in range(num_arms)]
        self.num_arms = num_arms
        self.num_context = num_context


    def choose_arm(self,context):
        """choose an arm to play according to internal policy"""
        raise NotImplementedError
        
    def update(self, arm, context, reward):
        """ update internal policy to reflect changed knowledge"""
        raise NotImplementedError
        
        
        
# class AbstractContextualSolverSingleModel(object):
#     def __init__(self, model_type, num_arms, num_context, model_params={}):
#         self.model = model_type(num_context+num_arms, **model_params) 
#         self.num_arms = num_arms
        
        

#         num_arms = 3
#         zero_arm = np.ones(num_arms)
#         self.arms = OrderedDict({x: np.ones(num_arms) for x in range(num_arms+1)})
#         for x in self.arms:
#             self.arms[x][x %3] -=1
            
#         self.arms[num_arms+1] = zero_arm
        
#         self.num_context = num_context


#     def choose_arm(self,context):
#         """choose an arm to play according to internal policy"""
#         raise NotImplementedError
        
#     def update(self, arm, context, reward):
#         """ update internal policy to reflect changed knowledge"""
#         raise NotImplementedError
        
        
        
class AbstractContextualSolverSingleModel(object):
    def __init__(self, model_type, num_arms, num_context, model_params={}):
        self.model = model_type(num_context+num_arms, **model_params) 
        self.num_arms = num_arms
        self.errors = []
        
        

        zero_arm = np.ones(num_arms)
        self.arms = OrderedDict({x: np.ones(num_arms) for x in range(num_arms)})
        arms2 = OrderedDict({x+num_arms: np.zeros(num_arms) for x in range(num_arms)})
        for x in self.arms:
            self.arms[x][x %num_arms] -=1
            arms2[x+num_arms][x%num_arms] +=1
            
        self.arms.update(arms2)
            
            
            
        self.arms[2*num_arms] = zero_arm
        
        self.num_context = num_context


    def choose_arm(self,context):
        """choose an arm to play according to internal policy"""
        raise NotImplementedError
        
    def update(self, arm, context, reward):
        """ update internal policy to reflect changed knowledge"""
        raise NotImplementedError
     


In [None]:
num_arms = 3
arms1 = OrderedDict({x: np.ones(num_arms) for x in range(num_arms)})
arms2 = OrderedDict({x+num_arms: np.zeros(num_arms) for x in range(num_arms)})

for x in range(num_arms):
    arms1[x][x %num_arms] -=1
    arms2[x+num_arms][x%num_arms] +=1

In [None]:
arms1

OrderedDict([(0, array([0., 1., 1.])),
             (1, array([1., 0., 1.])),
             (2, array([1., 1., 0.]))])

In [None]:
arms2

OrderedDict([(3, array([1., 0., 0.])),
             (4, array([0., 1., 0.])),
             (5, array([0., 0., 1.]))])

In [None]:
# nbdev_build_docs

In [None]:
arms1.update(arms2)
arms1

OrderedDict([(0, array([0., 1., 1.])),
             (1, array([1., 0., 1.])),
             (2, array([1., 1., 0.])),
             (3, array([1., 0., 0.])),
             (4, array([0., 1., 0.])),
             (5, array([0., 0., 1.]))])

In [None]:
from thompson_sampling.models import BayesLinReg


Bad key "text.kerning_factor" on line 4 in
/home/thomas/anaconda3/envs/pytorch_GPU/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test_patch.mplstyle.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.1.3/matplotlibrc.template
or from the matplotlib source distribution


In [None]:
acssm = AbstractContextualSolverSingleModel(BayesLinReg,3,3,model_params={'alpha':1,'beta':1})

In [None]:
acssm.arms

OrderedDict([(0, array([0., 1., 1.])),
             (1, array([1., 0., 1.])),
             (2, array([1., 1., 0.])),
             (3, array([1., 0., 0.])),
             (4, array([0., 1., 0.])),
             (5, array([0., 0., 1.])),
             (6, array([1., 1., 1.]))])

In [None]:
from nbdev.export import *
notebook2script()

Converted 00_abstractions.ipynb.
Converted 01_multi_armed_bandits.ipynb.
Converted 02_models.ipynb.
Converted 03_ensembles.ipynb.
Converted 04_solvers.ipynb.
Converted 05_bayesian_regression.ipynb.
Converted 06_network_with_thompson_attention.ipynb.
Converted 99_helpers.ipynb.
Converted contextual_bandits.ipynb.
Converted index.ipynb.
Converted linear_problem_setup.ipynb.
Converted noncontextual_bandits.ipynb.
Converted nonlinear_bandits.ipynb.


In [None]:
assert(list(acssm.arms.values()) == list(set(list(acssm.arms.values()))))

In [None]:
list_of_vals = [tuple(x) for x in list(acssm.arms.values())]

In [None]:
len(set(list_of_vals)) == len(list_of_vals)

In [None]:
list_of_vals

In [None]:
set(list_of_vals)

In [None]:
from collections import OrderedDict

num_arms = 3
zero_arm = np.ones(num_arms)
arms = OrderedDict({x: np.ones(num_arms) for x in range(num_arms+1)})
for x in arms:
    arms[x][x %3] -=1

In [None]:
arms