## Rock Paper Scissors

Notebook demonstrating the FPN approach to learning the equilibrium to a parametrized rock paper scissors game.
Samy Wu Fung, Howard Heaton, Qiuweil Li and Daniel McKenzie

In [9]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from FPN import FPN

In [10]:
class VINet(FPN):
    '''
        Test implementation of variational inequality net for learning to
        solve a VI over the probability simplex.
        
        WARNING: This code is, as yet, untested.
        
        Daniel McKenzie, April 20th 2021
        
    '''
    def __init__(self, action_dim, num_players, device,
                 s_hi=1.0, inf_dim=10):
        super().__init__()
        self._device = device
        self._lat_dim = action_dim*num_players
        self._inf_dim = inf_dim
        self._device = device
        
        # Layers
        self.fc_u = nn.Linear(lat_dim, lat_dim, bias=False)
        self.relu = nn.ReLU()
        
    def name(self):
        return 'VINet'
        
    def device(self):
        return self._device
    
    def lat_dim(self):
        return self._lat_dim
    
    def s_hi(self):
        return self._s_hi
    
    def project_to_simplex(self, u):
        """
           function handling the projection to simplex.
           From https://github.com/smatmo/ProjectionOntoSimplex

        """
        batch_size = u.shape[0]
        mu = torch.sort(u, descending=True)[0]
        cum_sum = torch.cumsum(mu, dim=1)
        # Don't actually need to track gradients in next step:
        j = torch.unsqueeze(torch.arange(1,self._lat_dim + 1,
                          dtype = mu.dtype, device = self._device),0)
        rho = torch.sum(j*mu - cum_sum + 1. > 0.0,dim=1, keepdim=True) - 1.
        rho = rho.long()
        sum_to_rho = cum_sum[torch.arange(batch_size), rho[:,0]]
        theta = (1 - torch.unsqueeze(sum_to_rho, -1))/(rho.type(sum_to_rho.dtype) + 1)
        w = torch.clamp(theta + u, min=0.0)
        return w
    
    def latent_space_forward(self, u, v):
        u = 0.99*self.relu(self.fc_u(u) + v)
        
        # Now do projection on to simplex
        
        w = self.project_to_simplex(u)
        
        return w

## Generating the data

The next few cells handle generating the data.

In [11]:
# Generating the training data
w_fixed = torch.randn(9) # hold fixed. Will determine dependence of payoff (P) on context (d)


In [12]:
# Generate Data d
def generate_d(n):
    # generate n random data
    d = torch.rand(n,3)
    return d

# Creates payoff matrix P given d
def create_Pmatrix(d, w=w_fixed):
    batch_size = d.shape[0] # number of samples
    P = torch.zeros(batch_size, d.shape[1], d.shape[1])

    d1 = d[:,0]; d2 = d[:,1]; d3 = d[:,2]
    P[:,0,1] = w[0]*d1 + w[1]*d2 + w[2]*d3
    P[:,0,2] = w[3]*d1 + w[4]*d2 + w[5]*d3
    P[:,1,0] = -w[0]*d1 - w[1]*d2 - w[2]*d3
    P[:,1,2] = w[6]*d1 + w[7]*d2 + w[8]*d3
    P[:,2,0] = -w[3]*d1 - w[4]*d2 - w[5]*d3
    P[:,2,1] = -w[6]*d1 - w[7]*d2 - w[8]*d3
    
    return P

In [13]:
def project_to_simplex(u, lat_dim, device):
        """
            function handling the projection to simplex.
        """
        
        batch_size = u.shape[0]
        mu = torch.sort(u, descending=True)[0]
        cum_sum = torch.cumsum(mu, dim=1)
        # Don't actually need to track gradients in next step:
        j = torch.unsqueeze(torch.arange(1,lat_dim + 1,
                           dtype = mu.dtype, device = device),0)
        rho = torch.sum(j*mu - cum_sum + 1. > 0.0,dim=1, keepdim=True) - 1.
        rho = rho.long()
        sum_to_rho = cum_sum[torch.arange(batch_size), rho[:,0]]
        theta = (1 - torch.unsqueeze(sum_to_rho, -1))/(rho.type(sum_to_rho.dtype) + 1)
        w = torch.clamp(theta + u, min=0.0)
        return w

In [18]:
# generates true data z given P and d
def generate_z(d, eps = 1e-4, max_iter=1000, verbosity=False):

    batch_size = d.shape[0]
    problem_dim = d.shape[1]
    z = torch.zeros(batch_size, 2*problem_dim)
    P = create_Pmatrix(d)
    print(P.shape)
    alpha = 1. # step size, can make this smaller if necessary.
  
    for j in range(max_iter):
        z_old = z.clone()
        player_1_grad = -torch.matmul(P,z[:,0:problem_dim])
        player_2_grad = torch.matmul(torch.transpose(P, 1, 2), z[:,problem_dim+1:])
        game_gradient = torch.cat(player_1_grad,player_2_grad)

        # Take step of projected fixed point iteration
        z = project_to_simplex(z - alpha*game_gradient)
        diff_norm = torch.norm(z_old - z)
        if verbosity ==True:
            print('iter = ', j, '\t |z_{k+1} - z_k| = ', diff_norm)

        if diff_norm < eps:
            return z

    return z

In [19]:
# Generate Data
n = 1000; n_test = 100
d_train = generate_d(n)
d_test  = generate_d(n_test)

train_z = generate_z(d_train)

torch.Size([1000, 3, 3])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3000x3 and 1000x3)