In [3]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [212]:
class UCB:
    def __init__(self, choice, upperBound, totalCost):
        self.processSpace = 10 # 10 state machine
        self.explorationFactor = 6
        self.decisionTime = 0 
        self.visitList = np.ones(self.processSpace) # count visits to each machines
        self.costMat = [] #store all the costs for every decisions
        self.totalCost = totalCost
        self.costAveList = np.zeros(self.processSpace)
        self.choice = choice
        self.upperBound = upperBound

    def costSimulation(self,candidates):
        costList = np.zeros(self.processSpace)
        for i in candidates:
            mean = i+1
            std = i+1
            costList[i] = (np.random.normal(mean,std,1))
        return costList
    
    def paramInit(self):
        initCostList = self.costSimulation(range(self.processSpace))
        self.choice = initCostList.argmax()
        print(self.choice)
        self.costMat.append(initCostList)
        self.costAveList = self.costMat[-1]
        self.upperBound = initCostList.max()
        self.visitList[self.choice] += 1
        self.decisionTime += 10
        self.totalCost += initCostList[self.choice]
        
    def paramUpdate(self):
        # candidates selection with sampling
        self.decisionTime += 1
        try:
            summedCost = np.sum(np.array(self.costMat),2)
        except:
            summedCost = self.costMat[0]
        self.costAveList = 1/self.visitList*summedCost
        sample = self.costAveList+self.upperBound*np.sqrt(self.explorationFactor*np.log(self.decisionTime)/self.visitList)
        candidates = np.argwhere(sample == np.amax(sample))
        candidates.flatten()
        # evaluating candidates
        costList = self.costSimulation(candidates)
        self.choice = costList.argmax()
        self.costMat.append(costList)
        self.totalCost += costList[self.choice]
        self.visitList[self.choice] += 1

In [213]:
ucbModel = UCB(0,0,0)
ucbModel.paramInit()
for i in range(1000):
    candidates = ucbModel.paramUpdate()
print(ucbModel.visitList)
print(ucbModel.totalCost)
# ucbModel.choice

6
[ 170.   93.   94.   93.   95.   91.   96.   94.   91.   94.]
6384.39466492


In [215]:
for i in range(10):
    print(np.sum(np.random.normal(10,10,1000)))

10181.7113198
10478.408504
10017.2432926
10201.2192534
9875.57900308
9600.60672277
9728.16453929
9909.49183212
9700.74508236
10339.3285941
