In [1]:
import random
import numpy as np
from collections import Counter

In [279]:
class LinUCB():
    '''
        This class implements LinUCB Disjoint algorithm
    '''
    
    def __init__( self,  arm_ids, alpha, ctxtVecLen):
        
        '''
            alpha : Controls exploration; 
                    Higher the alpha, wider the confidence interval and higher the chance of trying any given arm;
           
            kArms: Number of arms
            
            ctxtVecLen: length of the feature vector
            
        '''
        
        self.arm_ids    = arm_ids
        self.kArms      = len(arm_ids)
        self.alpha      = alpha
        self.ctxtVecLen = ctxtVecLen
        
        # A: (ctxtVecLen x ctxtVecLen) matrix = D_a.T * D_a + I_d  ( Where a is indexing on arms array)
        # The inverse of A is used in ridge regression 
        self.A_a = {}
        for idx, arm_id in enumerate(arm_ids):
            self.A_a[arm_id] = np.identity(ctxtVecLen) # A = D_T * D ( Where D is n*p matrix)

        # b: (ctxtVecLen x 1) corresponding response vector. 
        # Equals to D_a.T * c_a in ridge regression formulation
        self.b_a = {}
        for idx, arm_id in enumerate(arm_ids):
            self.b_a[arm_id] = np.zeros([ctxtVecLen,1]) # b = D_T * RV ( where RV is n*1 matrix )
        
    
    def __repr__(self):
        return f'LinUCB Object with {kArms} arms and with alpha {alpha}'.format(self.alpha, self.kArms, self.ctxtVecLen) 
    
    def select_arm(self, ctxtVec, random_policy=False):
        ''' Returns the index of the next arm to pull '''    
        
        # Reshape covariates input into (d x 1) shape vector
        ctxtVec = ctxtVec.reshape([-1,1])

        ucb = []
        for idx, arm_id in enumerate(np.random.permutation(list(LinUCBPolicy.A_a.keys()) )):
            # Find A inverse for ridge regression
            A_inv = np.linalg.inv(self.A_a[arm_id])
            
            # Perform ridge regression to obtain estimate of covariate coefficients theta
            # theta is (ctxtVecLen x 1) dimension vector
            theta = np.dot(A_inv, self.b_a[arm_id])
            
            ucb.append( 
                (
                    arm_id,
                    (np.dot(theta.T, ctxtVec) + self.alpha * np.sqrt(np.dot(ctxtVec.T, np.dot(A_inv,ctxtVec))))[0][0] 
                )
            )                
        if random_policy == True:    
            return np.random.choice(self.arm_ids)
        else:
            return( max(ucb,key=lambda x:x[1])[0] )            
    
    def update(self, ctxtVec, chosen_arm, reward):
        '''        
            After we pull an arm, we get a reward signal back from our system. This function update our algorithm's beliefs
            about the quality of the arm we just chosen by providing this reward information.
        
            chosen_arm : The numeric index of the most recently chosen arm
            reward     : The reward received from chossing that arm
        '''
        
        # Reshape covariates input into (d x 1) shape vector
        ctxtVec = ctxtVec.reshape([-1,1])        
    
        # Update A which is (d * d) matrix.
        self.A_a[chosen_arm] += np.dot(ctxtVec, ctxtVec.T)
        
        # Update b which is (d x 1) vector
        # reward is scalar
        self.b_a[chosen_arm] += reward * ctxtVec                


In [None]:
estimated_rewards_alphas = {}
for alpha in [0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5]:
#     alpha      = 0.25  # scalar factor on confidence interval ( higher the value higher the exploration )
    ctxtVecLen = 100
    arms_ids = list( range(1, kArms+1))
    LinUCBPolicy = LinUCB(arms_ids, alpha, ctxtVecLen)        
    print(LinUCBPolicy)

    armSelCntr = Counter()
    armRewCntr = Counter()

    with open('./dataset.txt') as fp:
        for idx, eachRecord in enumerate(fp):
            data_arm = int(eachRecord.split(' ')[0])
            data_reward = float(eachRecord.split()[1])
            covariate_string_list = eachRecord.split()[2:]        
            data_x_array = np.array([float(eCov) for eCov in eachRecord.split()[2:]])
            selected_arm = LinUCBPolicy.select_arm( data_x_array )

            if selected_arm == data_arm:
                armSelCntr[selected_arm] += 1
                LinUCBPolicy.update(data_x_array, selected_arm, data_reward) 
                armRewCntr[selected_arm] += data_reward

    print('Total Estimated Reward:\t', sum(armRewCntr.values())/sum(armSelCntr.values()) )
    print('armSelCntr: ', armSelCntr)
    print('armRewCntr: ', armRewCntr)
    print('\n\n\n')
    
    estimated_rewards_alphas[alpha] = sum(armRewCntr.values())/sum(armSelCntr.values())

LinUCB Object with 10 arms and with alpha 0


In [278]:
ucb

NameError: name 'ucb' is not defined