In [None]:
%matplotlib inline  
import matplotlib.pyplot as plt
import numpy 
import random
import math
from scipy.optimize import linprog
import copy
import statistics
import itertools

In [None]:
#Input: K: Number of dimensions
#       N: Approximation Parameter ('N' in the paper)
#       R: Vector-valued loss matrix input as a list (rows) of list (columns) of lists (vectors)
#       beta: Discount factor
#       rounds: Number of iterations ('n' in the paper)

#Output: V: List of H(K,N) points on the approximate Pareto frontier obtained after scaling losses in R so that the maximum absolute value of per-stage loss is (1-beta) on each dimension 
#        pol: List of H(K,N) distributions over actions (immediate randomized policy 'alpha' associated with each of the H(K,N) modes) 
#        trans: A list of H(K,N) lists. Each list is associated with a mode.
#               There are m entries in each list corresponding to m adversary actions. 
#               Each entry is a list of at most K tuples (a,b) where a is a mode and b is a probability of transitioning into that mode.                
#        modes: This is the list of H(K,N) modes. (This is included just for reference and is not needed in the implementation of the strategy)

from scipy.optimize import linprog
import copy

def ParetoFrontier(K,N,R,beta,rounds):
    #First, we scale the losses
    ma = max([max([abs(max(ont,key=abs)) for ont in ent]) for ent in R])
    factor = (1-beta)/ma
    R = [[[ont*factor for ont in ent] for ent in ola] for ola in R]
    m = len(R[1])
    el = len(R)
    
    #Next, we create the H(K,N) modes
    zero = [0 for i in range(K)]
    modes = []
    grid = [i/N for i in range(N+1)]
    basegrid = list(itertools.product(grid,repeat = K-1))
    basegrid = [list(ent) for ent in basegrid]  
    for i in range(K):
        temp = copy.deepcopy(basegrid)
        for ent in temp:
            ent.insert(i,0.0)
        modes += temp
    modes = [ent for ent in modes if ent != zero]
    modes = [zero] + modes 
    
    #Finally, we do value iteration
    V = [zero for ent in modes]
    for n in range(rounds):
        Vtemp = [[es*beta for es in ent] for ent in V]
        V = []
        pol = []
        trans = []
        for r in range(len(modes)):
            c = [1]+[0 for y in range(el+(len(modes)*m))]
            one = [[-1 for ent in range(K*m)]]
            Alph = []
            for i in range(el):
                temp = []
                for j in range(m):
                    temp += R[i][j]
                Alph.append(temp)
            Big = []
            for t in range(m):
                pre = [0 for h in range(t*K)]
                post = [0 for h in range((m-1-t)*K)]
                for ent in Vtemp:
                    Big.append(pre+ent+post)
            Aub = one + Alph + Big
            Aub = [list(ent) for ent in numpy.transpose(Aub)]
            bub = modes[r]*m
            Aeq = [[0]+[1 for y in range(el)]+[0 for y in range(len(modes)*m)]]
            for jj in range(m):
                Aeq.append([0 for y in range(el+1)]+[0 for y in range(jj*len(modes))]+[1 for y in range(len(modes))]+[0 for y in range((m-1-jj)*len(modes))])
            beq = [1] + [1 for y in range(m)]
            bnds = [(None,None)]+[(0,None) for i in range(len(c)-1)]
            sol = linprog(c, A_ub = Aub, b_ub = bub, A_eq = Aeq, b_eq = beq, bounds=bnds, options=dict(tol=10e-8))
            sol = sol.x.tolist()
            t = sol[0]
            V.append([ent+t for ent in modes[r]])
            tup = [[] for kk in range(m)]
            for e in range(el+1,len(sol)):
                if sol[e]>0:
                    tup[(e-el-1)//len(modes)].append(((e-el-1)%len(modes),sol[e]))
            trans.append(tup)
            pol.append(sol[1:el+1])
            
    return [V,pol,trans,modes]

In [None]:
K = 3
beta = 0.9
N = 20
rounds = 20
R = [[[0,0,-1],[0,-1,0],[0,1,1],[0,-1,-1],[0,0,1],[0,1,0]],
     [[0,0,-1],[1,0,1],[-1,0,0],[1,0,0],[0,0,1],[-1,0,-1]],
    [[1,1,0],[0,-1,0],[-1,0,0],[1,0,0],[-1,-1,0],[0,1,0]]]
V = ParetoFrontier(K,N,R,beta,rounds)

In [None]:
#GPS
N = 100
beta = 0.9
Perf = []
repeats = 10000
losses = [[0,0,1,0,1,1],[0,1,0,1,1,0],[1,0,0,1,0,1]]
regrand = []
reggps = []
regrev = []
for i in range(repeats):
    RandX = []
    RandY = []
    RandZ = []
    gpsX = []
    gpsY = []
    gpsZ = []
    revX = []
    revY = []
    revZ = []
    
    for t in range(N):
        a = random.randint(0,5)
        RandX.append(losses[0][a])
        RandY.append(losses[1][a])
        RandZ.append(losses[2][a])
        
        cumloss = [sum(gpsX),sum(gpsY),sum(gpsZ)]
        indices = sorted(range(len(cumloss)), key=lambda k: cumloss[k])
        d12 = cumloss[indices[1]] - cumloss[indices[0]]
        d13 = cumloss[indices[2]] - cumloss[indices[0]]
        if d12 == d13 == 0:
            a = random.randint(3,5)
            gpsX.append(losses[0][a])
            gpsY.append(losses[1][a])
            gpsZ.append(losses[2][a])
        elif d13 == d12 > 0 or d13 > d12 == 0 or d13 > d12 > 0:
            a = random.randint(2,3)
            gpsX.append(losses[0][a])
            gpsY.append(losses[1][a])
            gpsZ.append(losses[2][a])
            
        
        param1 = 0.5
        param2 = 0.5-math.sqrt(1-beta)
        revX.append(numpy.random.binomial(1, param2, 1)[0])
        revY.append(numpy.random.binomial(1, param1, 1)[0])
        revZ.append(numpy.random.binomial(1, param1, 1)[0])
       # elif t%3 == 2:
        #    b = numpy.random.choice([0,1,2],1, p=[0,1/3,1/3,1/3])[0]
        #    revX.append(losses[0][b])
        #   revY.append(losses[1][b])
        #    revZ.append(losses[2][b])
        
    gpslossrand = []
    gpslossgps = []
    gpslossrev = []
    
    sig = (1-math.sqrt(1-beta**2))/beta
    
    for t in range(N):
        cumlossrand = [sum(RandX[:t]),sum(RandY[:t]),sum(RandZ[:t])]
        indices = sorted(range(len(cumlossrand)), key=lambda k: cumlossrand[k])
        d12rand = cumlossrand[indices[1]] - cumlossrand[indices[0]]
        d13rand = cumlossrand[indices[2]] - cumlossrand[indices[0]]
        d23rand = cumlossrand[indices[2]] - cumlossrand[indices[1]]
        dist = [1-(sig**d12rand)/2 - (sig**(d13rand+d23rand))/6,
               (sig**d12rand)/2 - (sig**(d13rand+d23rand))/6,
                (sig**(d13rand+d23rand))/3]
        out = indices[numpy.random.choice(3,1,p=dist)[0]]
        if out == 0:
            gpslossrand.append(RandX[t])
        elif out == 1:
            gpslossrand.append(RandY[t])
        elif out == 2:
            gpslossrand.append(RandZ[t])
        
        cumlossgps = [sum(gpsX[:t]),sum(gpsY[:t]),sum(gpsZ[:t])]
        indices = sorted(range(len(cumlossgps)), key=lambda k: cumlossgps[k])
        d12gps = cumlossgps[indices[1]] - cumlossgps[indices[0]]
        d13gps = cumlossgps[indices[2]] - cumlossgps[indices[0]]
        d23gps = cumlossgps[indices[2]] - cumlossgps[indices[1]]
        dist = [1-(sig**d12gps)/2 - (sig**(d13gps+d23gps))/6,
               (sig**d12gps)/2 - (sig**(d13gps+d23gps))/6,
                (sig**(d13gps+d23gps))/3]
        out = indices[numpy.random.choice(3,1,p=dist)[0]]
        if out == 0:
            gpslossgps.append(gpsX[t])
        elif out == 1:
            gpslossgps.append(gpsY[t])
        elif out == 2:
            gpslossgps.append(gpsZ[t])
            
        cumlossrev = [sum(revX[:t]),sum(revY[:t]),sum(revZ[:t])]
        indices = sorted(range(len(cumlossrev)), key=lambda k: cumlossrev[k])
        d12rev = cumlossrev[indices[1]] - cumlossrev[indices[0]]
        d13rev = cumlossrev[indices[2]] - cumlossrev[indices[0]]
        d23rev = cumlossrev[indices[2]] - cumlossrev[indices[1]]
        dist = [1-(sig**d12rev)/2 - (sig**(d13rev+d23rev))/6,
               (sig**d12rev)/2 - (sig**(d13rev+d23rev))/6,
                (sig**(d13rev+d23rev))/3]
        out = indices[numpy.random.choice(3,1,p=dist)[0]]
        if out == 0:
            gpslossrev.append(revX[t])
        elif out == 1:
            gpslossrev.append(revY[t])
        elif out == 2:
            gpslossrev.append(revZ[t])
            
    regretrand = sum([j*beta**i for i,j in enumerate(gpslossrand)]) - min([sum([j*beta**i for i,j in enumerate(RandX)]),
                                                                    sum([j*beta**i for i,j in enumerate(RandY)]),
                                                                    sum([j*beta**i for i,j in enumerate(RandZ)])])
    regrand.append(regretrand)
    
    regretgps = sum([j*beta**i for i,j in enumerate(gpslossgps)]) - min([sum([j*beta**i for i,j in enumerate(gpsX)]),
                                                                    sum([j*beta**i for i,j in enumerate(gpsY)]),
                                                                    sum([j*beta**i for i,j in enumerate(gpsZ)])])
    reggps.append(regretgps)
    regretrev = sum([j*beta**i for i,j in enumerate(gpslossrev)]) - min([sum([j*beta**i for i,j in enumerate(revX)]),
                                                                    sum([j*beta**i for i,j in enumerate(revY)]),
                                                                    sum([j*beta**i for i,j in enumerate(revZ)])])
    regrev.append(regretrev)

In [None]:
#1321-mode
N = 100
beta = 0.9
Perf = []
repeats = 10000
losses = [[0,0,1,0,1,1],[0,1,0,1,1,0],[1,0,0,1,0,1]]
ourregrand = []
ourreggps = []
ourregrev = []
for i in range(repeats):
    RandX = []
    RandY = []
    RandZ = []
    gpsX = []
    gpsY = []
    gpsZ = []
    revX = []
    revY = []
    revZ = []
    
    for t in range(N):
        a = random.randint(0,5)
        RandX.append(losses[0][a])
        RandY.append(losses[1][a])
        RandZ.append(losses[2][a])
        
        cumloss = [sum(gpsX),sum(gpsY),sum(gpsZ)]
        indices = sorted(range(len(cumloss)), key=lambda k: cumloss[k])
        d12 = cumloss[indices[1]] - cumloss[indices[0]]
        d13 = cumloss[indices[2]] - cumloss[indices[0]]
        if d12 == d13 == 0:
            a = random.randint(3,5)
            gpsX.append(losses[0][a])
            gpsY.append(losses[1][a])
            gpsZ.append(losses[2][a])
        elif d13 == d12 > 0 or d13 > d12 == 0 or d13 > d12 > 0:
            a = random.randint(2,3)
            gpsX.append(losses[0][a])
            gpsY.append(losses[1][a])
            gpsZ.append(losses[2][a])
         
        param1 = 0.5
        param2 = 0.5-math.sqrt(1-beta)
        revX.append(numpy.random.binomial(1, param2, 1)[0])
        revY.append(numpy.random.binomial(1, param1, 1)[0])
        revZ.append(numpy.random.binomial(1, param1, 1)[0])
        
    ourlossrand = []
    ourlossgps = []
    ourlossrev = []
    
    pol = V[1]
    trans = V[2]
    
    mode = 0
    for t in range(N):
        dist = pol[mode]
        dist = [max(ent,0) for ent in dist]
        dist = [ent/sum(dist) for ent in dist]
        out = numpy.random.choice(3,1,p=dist)[0]
        if out == 0:
            ourlossrand.append(RandX[t])
        elif out == 1:
            ourlossrand.append(RandY[t])
        elif out == 2:
            ourlossrand.append(RandZ[t])
            
        temp = [RandX[t],RandY[t],RandZ[t]]
        for i in range(6):
            if temp == [losses[0][i],losses[1][i],losses[2][i]]:
                act = i
        
        nextmodes = [ent[0] for ent in trans[mode][act]]
        transdist = [ent[1] for ent in trans[mode][act]]
        mode = numpy.random.choice(nextmodes,1,p=transdist)[0]
     
    mode = 0
    for t in range(N):
        dist = pol[mode]
        dist = [max(ent,0) for ent in dist]
        dist = [ent/sum(dist) for ent in dist]
        out = numpy.random.choice(3,1,p=dist)[0]
        if out == 0:
            ourlossgps.append(gpsX[t])
        elif out == 1:
            ourlossgps.append(gpsY[t])
        elif out == 2:
            ourlossgps.append(gpsZ[t])
            
        temp = [gpsX[t],gpsY[t],gpsZ[t]]
        for i in range(6):
            if temp == [losses[0][i],losses[1][i],losses[2][i]]:
                act = i
        
        nextmodes = [ent[0] for ent in trans[mode][act]]
        transdist = [ent[1] for ent in trans[mode][act]]
        mode = numpy.random.choice(nextmodes,1,p=transdist)[0]
        
    mode = 0
    for t in range(N):
        dist = pol[mode]
        dist = [max(ent,0) for ent in dist]
        dist = [ent/sum(dist) for ent in dist]
        out = numpy.random.choice(3,1,p=dist)[0]
        if out == 0:
            ourlossrev.append(revX[t])
        elif out == 1:
            ourlossrev.append(revY[t])
        elif out == 2:
            ourlossrev.append(revZ[t])
            
        temp = [revX[t],revY[t],revZ[t]]
        if temp != [0,0,0] and temp != [1,1,1]:
            for i in range(6):
                if temp == [losses[0][i],losses[1][i],losses[2][i]]:
                    act = i
        
            nextmodes = [ent[0] for ent in trans[mode][act]]
            transdist = [ent[1] for ent in trans[mode][act]]
            mode = numpy.random.choice(nextmodes,1,p=transdist)[0]
    
    ourregretrand = sum([j*beta**i for i,j in enumerate(ourlossrand)]) - min([sum([j*beta**i for i,j in enumerate(RandX)]),
                                                                    sum([j*beta**i for i,j in enumerate(RandY)]),
                                                                    sum([j*beta**i for i,j in enumerate(RandZ)])])
    ourregrand.append(ourregretrand)
    
    ourregretgps = sum([j*beta**i for i,j in enumerate(ourlossgps)]) - min([sum([j*beta**i for i,j in enumerate(gpsX)]),
                                                                    sum([j*beta**i for i,j in enumerate(gpsY)]),
                                                                    sum([j*beta**i for i,j in enumerate(gpsZ)])])
    ourreggps.append(ourregretgps)
    ourregretrev = sum([j*beta**i for i,j in enumerate(ourlossrev)]) - min([sum([j*beta**i for i,j in enumerate(revX)]),
                                                                    sum([j*beta**i for i,j in enumerate(revY)]),
                                                                    sum([j*beta**i for i,j in enumerate(revZ)])])
    ourregrev.append(ourregretrev)
    

In [None]:
#Hedge
N = 100
beta = 0.9
Perf = []
repeats = 10000
losses = [[0,0,1,0,1,1],[0,1,0,1,1,0],[1,0,0,1,0,1]]
expregrand = []
expreggps = []
expregrev = []
for i in range(repeats):
    RandX = []
    RandY = []
    RandZ = []
    gpsX = []
    gpsY = []
    gpsZ = []
    revX = []
    revY = []
    revZ = []
    
    for t in range(N):
        a = random.randint(0,5)
        RandX.append(losses[0][a])
        RandY.append(losses[1][a])
        RandZ.append(losses[2][a])
        
        cumloss = [sum(gpsX),sum(gpsY),sum(gpsZ)]
        indices = sorted(range(len(cumloss)), key=lambda k: cumloss[k])
        d12 = cumloss[indices[1]] - cumloss[indices[0]]
        d13 = cumloss[indices[2]] - cumloss[indices[0]]
        if d12 == d13 == 0:
            a = random.randint(3,5)
            gpsX.append(losses[0][a])
            gpsY.append(losses[1][a])
            gpsZ.append(losses[2][a])
        elif d13 == d12 > 0 or d13 > d12 == 0 or d13 > d12 > 0:
            a = random.randint(2,3)
            gpsX.append(losses[0][a])
            gpsY.append(losses[1][a])
            gpsZ.append(losses[2][a])
         
        param1 = 0.5
        param2 = 0.5-math.sqrt(1-beta)
        revX.append(numpy.random.binomial(1, param2, 1)[0])
        revY.append(numpy.random.binomial(1, param1, 1)[0])
        revZ.append(numpy.random.binomial(1, param1, 1)[0])
        
    explossrand = []
    explossgps = []
    explossrev = []
    
    eta = math.sqrt(8*math.log(K)*(1-beta**2))
    for t in range(N):
        discumX = sum([j*beta**i for i,j in enumerate(RandX[:t])])
        discumY = sum([j*beta**i for i,j in enumerate(RandY[:t])])
        discumZ = sum([j*beta**i for i,j in enumerate(RandZ[:t])])
        pX = math.exp(-eta*discumX)/(math.exp(-eta*discumX) + math.exp(-eta*discumY)+ math.exp(-eta*discumZ))
        pY = math.exp(-eta*discumY)/(math.exp(-eta*discumX) + math.exp(-eta*discumY)+ math.exp(-eta*discumZ))
        b = numpy.random.choice([0,1,2],1,p=[pX,pY,1-pX-pY])[0]
        if b == 0:
            explossrand.append(RandX[t])
        elif b == 1:
            explossrand.append(RandY[t])
        elif b == 2:
            explossrand.append(RandY[t])
            
        discumX = sum([j*beta**i for i,j in enumerate(gpsX[:t])])
        discumY = sum([j*beta**i for i,j in enumerate(gpsY[:t])])
        discumZ = sum([j*beta**i for i,j in enumerate(gpsZ[:t])])
        pX = math.exp(-eta*discumX)/(math.exp(-eta*discumX) + math.exp(-eta*discumY)+ math.exp(-eta*discumZ))
        pY = math.exp(-eta*discumY)/(math.exp(-eta*discumX) + math.exp(-eta*discumY)+ math.exp(-eta*discumZ))
        b = numpy.random.choice([0,1,2],1,p=[pX,pY,1-pX-pY])[0]
        if b == 0:
            explossgps.append(gpsX[t])
        elif b == 1:
            explossgps.append(gpsY[t])
        elif b == 2:
            explossgps.append(gpsY[t])
            
        discumX = sum([j*beta**i for i,j in enumerate(revX[:t])])
        discumY = sum([j*beta**i for i,j in enumerate(revY[:t])])
        discumZ = sum([j*beta**i for i,j in enumerate(revZ[:t])])
        pX = math.exp(-eta*discumX)/(math.exp(-eta*discumX) + math.exp(-eta*discumY)+ math.exp(-eta*discumZ))
        pY = math.exp(-eta*discumY)/(math.exp(-eta*discumX) + math.exp(-eta*discumY)+ math.exp(-eta*discumZ))
        b = numpy.random.choice([0,1,2],1,p=[pX,pY,1-pX-pY])[0]
        if b == 0:
            explossrev.append(revX[t])
        elif b == 1:
            explossrev.append(revY[t])
        elif b == 2:
            explossrev.append(revY[t])
        
    expregretrand = sum([j*beta**i for i,j in enumerate(explossrand)]) - min([sum([j*beta**i for i,j in enumerate(RandX)]),
                                                                    sum([j*beta**i for i,j in enumerate(RandY)]),
                                                                             sum([j*beta**i for i,j in enumerate(RandZ)])])
    expregrand.append(expregretrand)
    
    
    expregretrev = sum([j*beta**i for i,j in enumerate(explossrev)]) - min([sum([j*beta**i for i,j in enumerate(revX)]),
                                                                    sum([j*beta**i for i,j in enumerate(revY)]),
                                                                     sum([j*beta**i for i,j in enumerate(revZ)])])
    expregrev.append(expregretrev)
    
    expregretgps = sum([j*beta**i for i,j in enumerate(explossgps)]) - min([sum([j*beta**i for i,j in enumerate(gpsX)]),
                                                                    sum([j*beta**i for i,j in enumerate(gpsY)]),
                                                                           sum([j*beta**i for i,j in enumerate(gpsZ)])])
    expreggps.append(expregretgps)
    

In [None]:
gps = [sum(regrand)/len(regrand),sum(regrev)/len(regrev),sum(reggps)/len(reggps)]
gpserr = [0.0196*numpy.std(regrand),0.0196*numpy.std(regrev),0.0196*numpy.std(reggps)]
our = [sum(ourregrand)/len(ourregrand),sum(ourregrev)/len(ourregrev),sum(ourreggps)/len(ourreggps)]
ourerr = [0.0196*numpy.std(ourregrand),0.0196*numpy.std(ourregrev),0.0196*numpy.std(ourreggps)]
exp = [sum(expregrand)/len(expregrand),sum(expregrev)/len(expregrev),sum(expreggps)/len(expreggps)]
experr = [0.0196*numpy.std(expregrand),0.0196*numpy.std(expregrev),0.0196*numpy.std(expreggps)]
y = [1,2,3]
my_xticks = ['GPS', 'Hedge','1321-mode']
fig, axs = plt.subplots(nrows=1, ncols=3, sharex=True, figsize=(10,2.5))
fig.tight_layout() 
ax = axs[0]
#ax.locator_params(nbins=5)
ax.set_xticklabels(my_xticks)
ax.set_xticks(y)
ax.errorbar(y,[gps[0],exp[0],our[0]],yerr=[gpserr[0],experr[0],ourerr[0]], fmt ='o',color = 'k', elinewidth=0.5, ecolor='k',capsize=5)
ax.set_title('Adversary D')

ax = axs[1]
ax.locator_params(nbins=6)
ax.set_xticklabels(my_xticks)
ax.set_xticks(y)
ax.errorbar(y,[gps[2],exp[2],our[2]],yerr=[gpserr[2],experr[2],ourerr[2]], fmt ='o',color = 'k', elinewidth=0.5, ecolor='k',capsize=5)
ax.set_title('Adversary E')

ax = axs[2]
ax.locator_params(nbins=6)
ax.set_xticklabels(my_xticks)
ax.set_xticks(y)
ax.errorbar(y,[gps[1],exp[1],our[1]],yerr=[gpserr[1],experr[1],ourerr[1]], fmt ='o',color = 'k', elinewidth=0.5, ecolor='k',capsize=5)
ax.set_title('Adversary F')

plt.savefig('beta-9-K-3.pdf')
plt.show()


In [None]:
V[0][0]

In [None]:
sum(regrev)/len(regrev)

In [None]:
441*3

In [None]:
math.sqrt(((1-beta)*math.log(K))/(2*(1+beta)))