In [1]:
import sys, os 
import numpy as np
import math
import glob

from copy import deepcopy

import matplotlib.pyplot as plt
from itertools import product
import importlib

if importlib.util.find_spec('matplotlib'):
    import matplotlib
    import matplotlib.pyplot as plt
    from matplotlib.font_manager import FontProperties
    # matplotlib.font_manager._rebuild()

if importlib.util.find_spec('ipywidgets'):
    from ipywidgets.widgets import IntSlider
    from ipywidgets import interact

In [2]:
# Useful functions

# compute Value at Risk
def get_var(arr, alpha, atoms):
    var = 0
    cum_p = 0
    for j, atom in enumerate(arr):
        cum_p += atom
        if cum_p>= alpha:
            var = atoms[j]
            break
    return var

# compute conditional value at Risk
def get_cvar(arr, alpha, atoms, var):
    cvar = var
    expected_c = 0 
    for i, j in enumerate(arr):
        if arr[i] > 0:
            expected_c += j * max(0,atoms[i]-var)
    cvar+= 1/(1- alpha) * expected_c
    return cvar


In [14]:
def value_iteration(sigma, trans_p, nstates=4, T=4, beta= 1):
    c =np.zeros([T,natoms])
    temp_dist = np.zeros([nstates, natoms])

    for i in range(T):
        for s in range(nstates):
            action = sigma[i][s]-1
            sum_p= np.zeros([natoms])
            m= np.zeros([natoms])

            print(f"\n------------- state: {s} ---------------")  
            for transition in trans_p[s][action]:
                for a in range(natoms):
                    # probabilities
                    sum_p[a] += transition[0] * dist[(transition[1]-1)][a]
            
            print(sum_p)
            
            for a in range(natoms):
                for transition in trans_p[s][action]:
                    # support adjustment
                    cost = max(atoms[0], min(atoms[-1], transition[2]+ beta*atoms[a]))
                    
                    index = cost/delta_a
                    l = math.floor(index); u= math.ceil(index)

                    print(round(index, 3),"atoms[a]:", atoms[a], "sum_p[a]:", sum_p[a], "cost:",transition[2]+ beta*atoms[a], "discount: ", beta)

                    if (l != u):
                        m[l] += sum_p[a] * (u -index) * transition[0]
                        m[u] += sum_p[a] * (index-l) * transition[0]
                    else:
                        m[l] += sum_p[a] * transition[0]
            
            
            temp_dist[s] = deepcopy (m)

        #update distribution
        for s, arr in enumerate(temp_dist):
            dist[s] = deepcopy(arr)

        print(f"\n------------- i: {i} ---------------\n {dist}")  


### Model 1

In [166]:
nstates = 4
actions = [(1,2), 1,1,1]
trans_p = [[[(0.5,2, 0), (0.5,4,2)], [(1.0, 3, 0.5)]], [[(1,2,0)]], [[(1,3,0)]], [[(1,4,0)]]]

In [167]:
natoms = 16
beta = 0.4
delta_a = 0.2
atoms = np.array([0+round(i*delta_a, 2) for i in range(natoms)])

In [173]:
c_n = 0
visited=[]
# sigma = [[1,1,1,1], [1,1,1,1], [1,1,1,1], [1,1,1,1]] # sigma 1
sigma = [[2,1,1,1], [2,1,1,1], [2,1,1,1], [2,1,1,1]] # sigma 2  
desired_cost= 0

In [174]:
dist = np.zeros([nstates, natoms])
for s in range(nstates):
    dist[s][0]= 1.0
    
dist

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [175]:
value_iteration(sigma, trans_p)


------------- state: 0 ---------------
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

------------- state: 1 ---------------
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

------------- state: 2 ---------------
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

------------- state: 3 ---------------
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

------------- i: 0 ---------------
 [[0.  0.  0.5 0.5 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]]

------------- state: 0 ---------------
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

------------- state: 1 ---------------
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

------------- state: 2 ---------------
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

------------- state: 3 ---------------
[1. 0. 0. 0. 0. 0. 0. 0.

In [176]:
alpha=0.5
print(f" VaR sigma {sigma}, alpha:{alpha}")
var=get_var(dist[0], alpha, atoms)
print(var)

 VaR sigma [[2, 1, 1, 1], [2, 1, 1, 1], [2, 1, 1, 1], [2, 1, 1, 1]], alpha:0.5
0.4


In [177]:
print(f" CvaR sigma {sigma}, alpha:{alpha}")
print(get_cvar(dist[0], alpha, atoms, var))    

 CvaR sigma [[2, 1, 1, 1], [2, 1, 1, 1], [2, 1, 1, 1], [2, 1, 1, 1]], alpha:0.5
0.6


In [117]:
cur_s = [0]
cur_p = [1.0]
c =np.zeros([T,natoms])
for i in range(T):
    for j,s in enumerate(cur_s):
        action = sigma[i][s]-1
        news = set()
        newp = []
        print(trans_p[s][action])
        for transition in trans_p[s][action]:
            cost = transition[2]*pow(beta,i)
            index = round(cost/delta_a, 2)
            print(f"cost : {cost}, index :{cost/delta_a}, cur_p {cur_p[j]}")
            c[i][int(index)] += (transition[0] * cur_p[j])
            news.add(transition[1]-1)
            newp.append(transition[0])
    cur_s = list(news)
    cur_p = newp
    print(newp)
            
print(c)
print(atoms)
print(news)

[(0.5, 2, 0), (0.5, 4, 2)]
cost : 0.0, index :0.0, cur_p 1.0
cost : 2.0, index :10.0, cur_p 1.0
[0.5, 0.5]
[[0.5 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.  0.  0.  0.  0. ]]
[0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0]
{1, 3}


In [165]:
atoms

array([0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8, 2. , 2.2, 2.4,
       2.6, 2.8, 3. ])

### Model 2

In [25]:
nstates = 3
actions = [(1,2), 1,1]
trans_p = [[[(0.5,2, 0), (0.5,1,0)], [(1.0, 3, 0.5)]], [[(1,2,1)]], [[(1,3,0)]]]

In [26]:
natoms = 16
beta = 0.4
delta_a = 0.2
atoms = np.array([0+round(i*delta_a, 2) for i in range(natoms)])

In [27]:
dist = np.zeros([nstates, natoms])
for s in range(nstates):
    dist[s][0]= 1.0
    
dist

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [28]:
# sigma = [[1,1,1,1], [1,1,1,1]] # sigma 1
sigma = [[2,1,1], [1,1,1]] # sigma 2  
desired_cost= 0

In [29]:
value_iteration(sigma, trans_p, beta=0.4, nstates=nstates, T=2)


------------- state: 0 ---------------
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
2.5 atoms[a]: 0.0 sum_p[a]: 1.0 cost: 0.5 discount:  0.4
2.9 atoms[a]: 0.2 sum_p[a]: 0.0 cost: 0.5800000000000001 discount:  0.4
3.3 atoms[a]: 0.4 sum_p[a]: 0.0 cost: 0.66 discount:  0.4
3.7 atoms[a]: 0.6 sum_p[a]: 0.0 cost: 0.74 discount:  0.4
4.1 atoms[a]: 0.8 sum_p[a]: 0.0 cost: 0.8200000000000001 discount:  0.4
4.5 atoms[a]: 1.0 sum_p[a]: 0.0 cost: 0.9 discount:  0.4
4.9 atoms[a]: 1.2 sum_p[a]: 0.0 cost: 0.98 discount:  0.4
5.3 atoms[a]: 1.4 sum_p[a]: 0.0 cost: 1.06 discount:  0.4
5.7 atoms[a]: 1.6 sum_p[a]: 0.0 cost: 1.1400000000000001 discount:  0.4
6.1 atoms[a]: 1.8 sum_p[a]: 0.0 cost: 1.2200000000000002 discount:  0.4
6.5 atoms[a]: 2.0 sum_p[a]: 0.0 cost: 1.3 discount:  0.4
6.9 atoms[a]: 2.2 sum_p[a]: 0.0 cost: 1.3800000000000001 discount:  0.4
7.3 atoms[a]: 2.4 sum_p[a]: 0.0 cost: 1.46 discount:  0.4
7.7 atoms[a]: 2.6 sum_p[a]: 0.0 cost: 1.54 discount:  0.4
8.1 atoms[a]: 2.8 sum_p[a]: 0.0

In [30]:
atoms

array([0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8, 2. , 2.2, 2.4,
       2.6, 2.8, 3. ])

In [31]:
dist[0]

array([0.05, 0.4 , 0.55, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  ])

In [32]:
alpha=0.5
print(f" VaR sigma {sigma}, alpha:{alpha}")
var=get_var(dist[0], alpha, atoms)
print(var)

 VaR sigma [[2, 1, 1], [1, 1, 1]], alpha:0.5
0.4


In [33]:
print(f" CvaR sigma {sigma}, alpha:{alpha}")
print(get_cvar(dist[0], alpha, atoms, var))    

 CvaR sigma [[2, 1, 1], [1, 1, 1]], alpha:0.5
0.4


In [66]:
cur_s = [0]
cur_p = [1.0]
c =np.zeros([T,natoms])
for i in range(T):
    for j,s in enumerate(cur_s):
        action = sigma[i][s]-1
        news = set()
        newp = []
        print(trans_p[s][action])
        for transition in trans_p[s][action]:
            cost = transition[2]*pow(beta,i)
            # print(f"cost : {cost}, cur_p {cur_p[j]}")
            index = np.floor(cost/delta_a)
            c[i][int(index)] += (transition[0] * cur_p[j])
            news.add(transition[1]-1)
            newp.append(transition[0])
    cur_s = list(news)
    cur_p = newp
    # print(newp)
            
print(c)
print(atoms)
print(news)

[(0.5, 2, 0), (0.5, 1, 0)]
[(1.0, 2, 0.5)]
[(1, 1, 1)]
[[1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]]
[0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0]
{0}
