In [None]:
%matplotlib inline  
import matplotlib.pyplot as plt
import numpy 
import random
import math
from scipy.optimize import linprog
import copy
import statistics
import itertools

In [None]:
#Input: K: Number of dimensions
#       N: Approximation Parameter ('N' in the paper)
#       R: Vector-valued loss matrix input as a list (rows) of list (columns) of lists (vectors)
#       beta: Discount factor
#       rounds: Number of iterations ('n' in the paper)

#Output: V: List of H(K,N) points on the approximate Pareto frontier obtained after scaling losses in R so that the maximum absolute value of per-stage loss is (1-beta) on each dimension 
#        pol: List of H(K,N) distributions over actions (immediate randomized policy 'alpha' associated with each of the H(K,N) modes) 
#        trans: A list of H(K,N) lists. Each list is associated with a mode.
#               There are m entries in each list corresponding to m adversary actions. 
#               Each entry is a list of at most K tuples (a,b) where a is a mode and b is a probability of transitioning into that mode.                
#        modes: This is the list of H(K,N) modes. (This is included just for reference and is not needed in the implementation of the strategy)

from scipy.optimize import linprog
import copy

def ParetoFrontier(K,N,R,beta,rounds):
    #First, we scale the losses
    ma = max([max([abs(max(ont,key=abs)) for ont in ent]) for ent in R])
    factor = (1-beta)/ma
    R = [[[ont*factor for ont in ent] for ent in ola] for ola in R]
    m = len(R[1])
    el = len(R)
    
    #Next, we create the H(K,N) modes
    zero = [0 for i in range(K)]
    modes = []
    grid = [i/N for i in range(N+1)]
    basegrid = list(itertools.product(grid,repeat = K-1))
    basegrid = [list(ent) for ent in basegrid]  
    for i in range(K):
        temp = copy.deepcopy(basegrid)
        for ent in temp:
            ent.insert(i,0.0)
        modes += temp
    modes = [ent for ent in modes if ent != zero]
    modes = [zero] + modes 
    
    #Finally, we do value iteration
    V = [zero for ent in modes]
    for n in range(rounds):
        Vtemp = [[es*beta for es in ent] for ent in V]
        V = []
        pol = []
        trans = []
        for r in range(len(modes)):
            c = [1]+[0 for y in range(el+(len(modes)*m))]
            one = [[-1 for ent in range(K*m)]]
            Alph = []
            for i in range(el):
                temp = []
                for j in range(m):
                    temp += R[i][j]
                Alph.append(temp)
            Big = []
            for t in range(m):
                pre = [0 for h in range(t*K)]
                post = [0 for h in range((m-1-t)*K)]
                for ent in Vtemp:
                    Big.append(pre+ent+post)
            Aub = one + Alph + Big
            Aub = [list(ent) for ent in numpy.transpose(Aub)]
            bub = modes[r]*m
            Aeq = [[0]+[1 for y in range(el)]+[0 for y in range(len(modes)*m)]]
            for jj in range(m):
                Aeq.append([0 for y in range(el+1)]+[0 for y in range(jj*len(modes))]+[1 for y in range(len(modes))]+[0 for y in range((m-1-jj)*len(modes))])
            beq = [1] + [1 for y in range(m)]
            bnds = [(None,None)]+[(0,None) for i in range(len(c)-1)]
            sol = linprog(c, A_ub = Aub, b_ub = bub, A_eq = Aeq, b_eq = beq, bounds=bnds, options=dict(tol=10e-8))
            sol = sol.x.tolist()
            t = sol[0]
            V.append([ent+t for ent in modes[r]])
            tup = [[] for kk in range(m)]
            for e in range(el+1,len(sol)):
                if sol[e]>0:
                    tup[(e-el-1)//len(modes)].append(((e-el-1)%len(modes),sol[e]))
            trans.append(tup)
            pol.append(sol[1:el+1])
            
    return [V,pol,trans,modes]

In [None]:
K = 2
beta = 0.9
N = 201
rounds = 44
R = [[[0,1],[0,-1]],[[-1,0],[1,0]]]
V = ParetoFrontier(K,N,R,beta,rounds)

In [None]:
K = 2
beta = 0.9
N = 20
rounds = 44
R = [[[0,1],[0,-1]],[[-1,0],[1,0]]]
Vsimp = ParetoFrontier(K,N,R,beta,rounds)

In [None]:
#GPS
N = 100
beta = 0.9
Perf = []
repeats = 10000
losses = [[1,0],[0,1]]
regrand = []
regola = []
regrev = []
for i in range(repeats):
    RandX = []
    RandY = []
    
    olaX = []
    olaY = []
    
    revX = []
    revY = []
    
    for t in range(N):
        a = random.randint(0,1)
        RandX.append(losses[0][a])
        RandY.append(losses[1][a])
        
        if t%2 == 0:
            b = int(1-numpy.random.binomial(1,(9/10)**t,1))
            olaX.append(losses[0][b])
            olaY.append(losses[1][b])
        elif t%2 == 1:
            b = int(1-numpy.random.binomial(1,(9/10)**(1/t),1))
            olaX.append(losses[0][b])
            olaY.append(losses[1][b])
                
        param1 = 0.5
        param2 = 0.5-math.sqrt(1-beta)
        revX.append(numpy.random.binomial(1, param2, 1)[0])
        revY.append(numpy.random.binomial(1, param1, 1)[0])
        
        
    gpslossrand = []
    gpslossrev = []
    gpslossola = []
    
    sig = (1-math.sqrt(1-beta**2))/beta
    
    for t in range(N):
        cumlossrand = [sum(RandX[:t]),sum(RandY[:t])]
        indices = sorted(range(len(cumlossrand)), key=lambda k: cumlossrand[k])
        d12rand = cumlossrand[indices[1]] - cumlossrand[indices[0]]
        
        dist = [1-(sig**d12rand)/2, (sig**d12rand)/2]
        out = indices[numpy.random.choice(2,1,p=dist)[0]]
        if out == 0:
            gpslossrand.append(RandX[t])
        elif out == 1:
            gpslossrand.append(RandY[t])
       
            
        cumlossrev = [sum(revX[:t]),sum(revY[:t])]
        indices = sorted(range(len(cumlossrev)), key=lambda k: cumlossrev[k])
        d12rev = cumlossrev[indices[1]] - cumlossrev[indices[0]]
        
        dist = [1-(sig**d12rev)/2,(sig**d12rev)/2 ]
        out = indices[numpy.random.choice(2,1,p=dist)[0]]
        if out == 0:
            gpslossrev.append(revX[t])
        elif out == 1:
            gpslossrev.append(revY[t])
        
        cumlossola = [sum(olaX[:t]),sum(olaY[:t])]
        indices = sorted(range(len(cumlossola)), key=lambda k: cumlossola[k])
        d12ola = cumlossola[indices[1]] - cumlossola[indices[0]]
        
        dist = [1-(sig**d12ola)/2,(sig**d12ola)/2 ]
        out = indices[numpy.random.choice(2,1,p=dist)[0]]
        if out == 0:
            gpslossola.append(olaX[t])
        elif out == 1:
            gpslossola.append(olaY[t])
            
    regretrand = sum([j*beta**i for i,j in enumerate(gpslossrand)]) - min([sum([j*beta**i for i,j in enumerate(RandX)]),
                                                                    sum([j*beta**i for i,j in enumerate(RandY)])])
    regrand.append(regretrand)
    
    
    regretrev = sum([j*beta**i for i,j in enumerate(gpslossrev)]) - min([sum([j*beta**i for i,j in enumerate(revX)]),
                                                                    sum([j*beta**i for i,j in enumerate(revY)])])
    regrev.append(regretrev)
    
    regretola = sum([j*beta**i for i,j in enumerate(gpslossola)]) - min([sum([j*beta**i for i,j in enumerate(olaX)]),
                                                                    sum([j*beta**i for i,j in enumerate(olaY)])])
    regola.append(regretola)

In [None]:
#Our-403

N = 100
beta = 0.9
Perf = []
repeats = 10000
losses = [[1,0],[0,1]]
ourregrand = []
ourregola = []
ourregrev = []
for i in range(repeats):
    RandX = []
    RandY = []
    
    olaX = []
    olaY = []
    
    revX = []
    revY = []
    
    for t in range(N):
        a = random.randint(0,1)
        RandX.append(losses[0][a])
        RandY.append(losses[1][a])
        
        if t%2 == 0:
            b = int(1-numpy.random.binomial(1,(9/10)**t,1))
            olaX.append(losses[0][b])
            olaY.append(losses[1][b])
        elif t%2 == 1:
            b = int(1-numpy.random.binomial(1,(9/10)**(1/t),1))
            olaX.append(losses[0][b])
            olaY.append(losses[1][b])
                
        param1 = 0.5
        param2 = 0.5-math.sqrt(1-beta)
        revX.append(numpy.random.binomial(1, param2, 1)[0])
        revY.append(numpy.random.binomial(1, param1, 1)[0])
    
    ourlossrand = []
    ourlossrev = []
    ourlossola = []
    
    pol = V[1]
    trans = V[2]
    
    mode = 0
    for t in range(N):
        dist = pol[mode]
        dist = [max(ent,0) for ent in dist]
        dist = [ent/sum(dist) for ent in dist]
        out = numpy.random.choice(2,1,p=dist)[0]
        if out == 0:
            ourlossrand.append(RandX[t])
        elif out == 1:
            ourlossrand.append(RandY[t])
        
            
        temp = [RandX[t],RandY[t]]
        for i in range(2):
            if temp == [losses[0][i],losses[1][i]]:
                act = i
        
        nextmodes = [ent[0] for ent in trans[mode][act]]
        transdist = [ent[1] for ent in trans[mode][act]]
        mode = numpy.random.choice(nextmodes,1,p=transdist)[0]
     
        
    mode = 0
    for t in range(N):
        dist = pol[mode]
        dist = [max(ent,0) for ent in dist]
        dist = [ent/sum(dist) for ent in dist]
        out = numpy.random.choice(2,1,p=dist)[0]
        if out == 0:
            ourlossrev.append(revX[t])
        elif out == 1:
            ourlossrev.append(revY[t])
       
            
        temp = [revX[t],revY[t]]
        if temp != [0,0] and temp != [1,1]:
            for i in range(2):
                if temp == [losses[0][i],losses[1][i]]:
                    act = i
        
            nextmodes = [ent[0] for ent in trans[mode][act]]
            transdist = [ent[1] for ent in trans[mode][act]]
            mode = numpy.random.choice(nextmodes,1,p=transdist)[0]
    
    mode = 0
    for t in range(N):
        dist = pol[mode]
        dist = [max(ent,0) for ent in dist]
        dist = [ent/sum(dist) for ent in dist]
        out = numpy.random.choice(2,1,p=dist)[0]
        if out == 0:
            ourlossola.append(olaX[t])
        elif out == 1:
            ourlossola.append(olaY[t])
        
            
        temp = [olaX[t],olaY[t]]
        for i in range(2):
            if temp == [losses[0][i],losses[1][i]]:
                act = i
        
        nextmodes = [ent[0] for ent in trans[mode][act]]
        transdist = [ent[1] for ent in trans[mode][act]]
        mode = numpy.random.choice(nextmodes,1,p=transdist)[0]
    
    ourregretrand = sum([j*beta**i for i,j in enumerate(ourlossrand)]) - min([sum([j*beta**i for i,j in enumerate(RandX)]),
                                                                    sum([j*beta**i for i,j in enumerate(RandY)])])
    ourregrand.append(ourregretrand)
    
    
    ourregretrev = sum([j*beta**i for i,j in enumerate(ourlossrev)]) - min([sum([j*beta**i for i,j in enumerate(revX)]),
                                                                    sum([j*beta**i for i,j in enumerate(revY)])])
    ourregrev.append(ourregretrev)
    
    ourregretola = sum([j*beta**i for i,j in enumerate(ourlossola)]) - min([sum([j*beta**i for i,j in enumerate(olaX)]),
                                                                    sum([j*beta**i for i,j in enumerate(olaY)])])
    ourregola.append(ourregretola)
    

In [None]:
#ours 41
N = 100
beta = 0.9
Perf = []
repeats = 10000
losses = [[1,0],[0,1]]
our2regrand = []
our2regola = []
our2regrev = []
for i in range(repeats):
    RandX = []
    RandY = []
    
    olaX = []
    olaY = []
    
    revX = []
    revY = []
    
    for t in range(N):
        a = random.randint(0,1)
        RandX.append(losses[0][a])
        RandY.append(losses[1][a])
        
        if t%2 == 0:
            b = int(1-numpy.random.binomial(1,(9/10)**t,1))
            olaX.append(losses[0][b])
            olaY.append(losses[1][b])
        elif t%2 == 1:
            b = int(1-numpy.random.binomial(1,(9/10)**(1/t),1))
            olaX.append(losses[0][b])
            olaY.append(losses[1][b])
                
        param1 = 0.5
        param2 = 0.5-math.sqrt(1-beta)
        revX.append(numpy.random.binomial(1, param2, 1)[0])
        revY.append(numpy.random.binomial(1, param1, 1)[0])
    
    ourlossrand = []
    ourlossrev = []
    ourlossola = []
    
    pol = Vsimp[1]
    trans = Vsimp[2]
    
    mode = 0
    for t in range(N):
        dist = pol[mode]
        dist = [max(ent,0) for ent in dist]
        dist = [ent/sum(dist) for ent in dist]
        out = numpy.random.choice(2,1,p=dist)[0]
        if out == 0:
            ourlossrand.append(RandX[t])
        elif out == 1:
            ourlossrand.append(RandY[t])
        
            
        temp = [RandX[t],RandY[t]]
        for i in range(2):
            if temp == [losses[0][i],losses[1][i]]:
                act = i
        
        nextmodes = [ent[0] for ent in trans[mode][act]]
        transdist = [ent[1] for ent in trans[mode][act]]
        mode = numpy.random.choice(nextmodes,1,p=transdist)[0]
     
        
    mode = 0
    for t in range(N):
        dist = pol[mode]
        dist = [max(ent,0) for ent in dist]
        dist = [ent/sum(dist) for ent in dist]
        out = numpy.random.choice(2,1,p=dist)[0]
        if out == 0:
            ourlossrev.append(revX[t])
        elif out == 1:
            ourlossrev.append(revY[t])
       
            
        temp = [revX[t],revY[t]]
        if temp != [0,0] and temp != [1,1]:
            for i in range(2):
                if temp == [losses[0][i],losses[1][i]]:
                    act = i
        
            nextmodes = [ent[0] for ent in trans[mode][act]]
            transdist = [ent[1] for ent in trans[mode][act]]
            mode = numpy.random.choice(nextmodes,1,p=transdist)[0]
    
    mode = 0
    for t in range(N):
        dist = pol[mode]
        dist = [max(ent,0) for ent in dist]
        dist = [ent/sum(dist) for ent in dist]
        out = numpy.random.choice(2,1,p=dist)[0]
        if out == 0:
            ourlossola.append(olaX[t])
        elif out == 1:
            ourlossola.append(olaY[t])
        
            
        temp = [olaX[t],olaY[t]]
        for i in range(2):
            if temp == [losses[0][i],losses[1][i]]:
                act = i
        
        nextmodes = [ent[0] for ent in trans[mode][act]]
        transdist = [ent[1] for ent in trans[mode][act]]
        mode = numpy.random.choice(nextmodes,1,p=transdist)[0]
    
    ourregretrand = sum([j*beta**i for i,j in enumerate(ourlossrand)]) - min([sum([j*beta**i for i,j in enumerate(RandX)]),
                                                                    sum([j*beta**i for i,j in enumerate(RandY)])])
    our2regrand.append(ourregretrand)
    
    
    ourregretrev = sum([j*beta**i for i,j in enumerate(ourlossrev)]) - min([sum([j*beta**i for i,j in enumerate(revX)]),
                                                                    sum([j*beta**i for i,j in enumerate(revY)])])
    our2regrev.append(ourregretrev)
    
    ourregretola = sum([j*beta**i for i,j in enumerate(ourlossola)]) - min([sum([j*beta**i for i,j in enumerate(olaX)]),
                                                                    sum([j*beta**i for i,j in enumerate(olaY)])])
    our2regola.append(ourregretola)
    

In [None]:
#Hedge
N = 100
beta = 0.9
K = 2
Perf = []
repeats = 10000
losses = [[1,0],[0,1]]
expregrand = []
expregola = []
expregrev = []
for i in range(repeats):
    RandX = []
    RandY = []
    
    olaX = []
    olaY = []
    
    revX = []
    revY = []
    
    for t in range(N):
        a = random.randint(0,1)
        RandX.append(losses[0][a])
        RandY.append(losses[1][a])
        
        if t%2 == 0:
            b = int(1-numpy.random.binomial(1,(9/10)**t,1))
            olaX.append(losses[0][b])
            olaY.append(losses[1][b])
        elif t%2 == 1:
            b = int(1-numpy.random.binomial(1,(9/10)**(1/t),1))
            olaX.append(losses[0][b])
            olaY.append(losses[1][b])
                
        param1 = 0.5
        param2 = 0.5 - math.sqrt(1-beta)
        revX.append(numpy.random.binomial(1, param2, 1)[0])
        revY.append(numpy.random.binomial(1, param1, 1)[0])
    
    explossrand = []
    explossrev = []
    explossola = []
    
    eta = math.sqrt(8*math.log(K)*(1-beta**2))
    for t in range(N):
        discumX = sum([j*beta**i for i,j in enumerate(RandX[:t])])
        discumY = sum([j*beta**i for i,j in enumerate(RandY[:t])])
        pX = math.exp(-eta*discumX)/(math.exp(-eta*discumX) + math.exp(-eta*discumY))
        b = int(1-numpy.random.binomial(1,pX,1))
        if b == 0:
            explossrand.append(RandX[t])
        elif b == 1:
            explossrand.append(RandY[t])
        
        discumX = sum([j*beta**i for i,j in enumerate(olaX[:t])])
        discumY = sum([j*beta**i for i,j in enumerate(olaY[:t])])
        pX = math.exp(-eta*discumX)/(math.exp(-eta*discumX) + math.exp(-eta*discumY))
        b = int(1-numpy.random.binomial(1,pX,1))
        if b == 0:
            explossola.append(olaX[t])
        elif b == 1:
            explossola.append(olaY[t])
            
        discumX = sum([j*beta**i for i,j in enumerate(revX[:t])])
        discumY = sum([j*beta**i for i,j in enumerate(revY[:t])])
        pX = math.exp(-eta*discumX)/(math.exp(-eta*discumX) + math.exp(-eta*discumY))
        b = int(1-numpy.random.binomial(1,pX,1))
        if b == 0:
            explossrev.append(revX[t])
        elif b == 1:
            explossrev.append(revY[t])
        
    expregretrand = sum([j*beta**i for i,j in enumerate(explossrand)]) - min([sum([j*beta**i for i,j in enumerate(RandX)]),
                                                                    sum([j*beta**i for i,j in enumerate(RandY)])])
    expregrand.append(expregretrand)
    
    
    expregretrev = sum([j*beta**i for i,j in enumerate(explossrev)]) - min([sum([j*beta**i for i,j in enumerate(revX)]),
                                                                    sum([j*beta**i for i,j in enumerate(revY)])])
    expregrev.append(expregretrev)
    
    expregretola = sum([j*beta**i for i,j in enumerate(explossola)]) - min([sum([j*beta**i for i,j in enumerate(olaX)]),
                                                                    sum([j*beta**i for i,j in enumerate(olaY)])])
    expregola.append(expregretola)
    

In [None]:
gps = [sum(regrand)/len(regrand),sum(regrev)/len(regrev),sum(regola)/len(regola)]
gpserr = [0.0196*numpy.std(regrand),0.0196*numpy.std(regrev),0.0196*numpy.std(regola)]
our = [sum(ourregrand)/len(ourregrand),sum(ourregrev)/len(ourregrev),sum(ourregola)/len(ourregola)]
ourerr = [0.0196*numpy.std(ourregrand),0.0196*numpy.std(ourregrev),0.0196*numpy.std(ourregola)]
our2 = [sum(our2regrand)/len(our2regrand),sum(our2regrev)/len(our2regrev),sum(our2regola)/len(our2regola)]
our2err = [0.0196*numpy.std(our2regrand),0.0196*numpy.std(our2regrev),0.0196*numpy.std(our2regola)]
exp = [sum(expregrand)/len(expregrand),sum(expregrev)/len(expregrev),sum(expregola)/len(expregola)]
experr = [0.0196*numpy.std(expregrand),0.0196*numpy.std(expregrev),0.0196*numpy.std(expregola)]
y = [1,2,3,4]
my_xticks = ['GPS', 'Hedge','403-mode','41-mode']
fig, axs = plt.subplots(nrows=1, ncols=3, sharex=True, figsize=(10,2.5))
fig.tight_layout() 
ax = axs[0]
#ax.locator_params(nbins=5)
ax.set_xticklabels(my_xticks)
ax.set_xticks(y)
ax.errorbar(y,[gps[0],exp[0],our[0],our2[0]],yerr=[gpserr[0],experr[0],ourerr[0],our2err[0]], fmt ='o',color = 'k', elinewidth=0.5, ecolor='k',capsize=5)
ax.set_title('Adversary A')

ax = axs[1]
ax.locator_params(nbins=6)
ax.set_xticklabels(my_xticks)
ax.set_xticks(y)
ax.errorbar(y,[gps[1],exp[1],our[1],our2[1]],yerr=[gpserr[1],experr[1],ourerr[1],our2err[1]], fmt ='o',color = 'k', elinewidth=0.5, ecolor='k',capsize=5)
ax.set_title('Adversary B')

ax = axs[2]
ax.locator_params(nbins=6)
ax.set_xticklabels(my_xticks)
ax.set_xticks(y)
ax.errorbar(y,[gps[2],exp[2],our[2],our2[2]],yerr=[gpserr[2],experr[2],ourerr[2],our2err[2]], fmt ='o',color = 'k', elinewidth=0.5, ecolor='k',capsize=5)
ax.set_title('Adversary C')

plt.savefig('beta-9-K-2.pdf')
plt.show()
