In [78]:
import tensorflow as tf
import numpy as np

def game_theory_mixed_strategy(p_table):
    ### payoff table
    #### A's        B's strategy
    ####      L          R
    #### U  (1, 1)     (0, 0)
    #### D  (0, 0)     (1, 1)
    payoff_table = tf.constant(p_table)
    
    #### A's U strategy = p and D's R strategy = (1 - p)
    #### B's L strategy = q and B's R strategy = (1 - q)
    pv = tf.Variable(1., dtype=tf.float32)
    qv = tf.Variable(1., dtype=tf.float32)
    
    #### Defined p and q scale (0, 1)
    p = tf.sigmoid(pv)
    q = tf.sigmoid(qv)
    
    a_strategy = tf.transpose([p, 1 - p])
    b_strategy = tf.transpose([q, 1 - q])
    
    a_u = tf.reshape(tf.slice(payoff_table, [0, 0, 0], [1, 2, 1]), 
                    [-1])
    a_d = tf.reshape(tf.slice(payoff_table, [1, 0, 0], [1, 2, 1]),
                    [-1])
    
    b_l = tf.reshape(tf.slice(payoff_table, [0, 0, 1], [2, 1, 1]),
                    [-1])
    b_r = tf.reshape(tf.slice(payoff_table, [0, 1, 1], [2, 1, 1]),
                    [-1])
    a_u_result = tf.reduce_sum(tf.multiply(a_u, a_strategy))
    a_d_result = tf.reduce_sum(tf.multiply(a_d, a_strategy))
    a_loss = tf.square(a_u_result - a_d_result)
    a_opt = tf.train.AdamOptimizer(0.2).minimize(a_loss)
    
    b_l_result = tf.reduce_sum(tf.multiply(b_l, b_strategy))
    b_r_result = tf.reduce_sum(tf.multiply(b_r, b_strategy))
    b_loss = tf.square(b_l_result - b_r_result)
    b_opt = tf.train.AdamOptimizer(0.2).minimize(b_loss)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print (sess.run(payoff_table))
        _p = 0
        _q = 0
        for i in range(100):
            _, _p, _l = sess.run([a_opt, p, a_loss])
            _, _q, _l = sess.run([b_opt, q, b_loss])
        return _p, _q
        
if __name__ == '__main__':
    pt = [
        [[1., -1.], [-1., 1.]],
        [[-1., 1.], [1., -1.]]
    ]
    p, q = game_theory_mixed_strategy(pt)
    print ('Strategy: ', [[p, 1-p], [q, 1-q]])

[[[ 1. -1.]
  [-1.  1.]]

 [[-1.  1.]
  [ 1. -1.]]]
Strategy:  [[0.50038111, 0.49961888790130615], [0.50038111, 0.49961888790130615]]
