In [None]:
import pickle as pkl
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # <--- This is important for 3d plotting 
import seaborn as sns
import pandas as pd
from environment.TheoreticalMarket import *


In [None]:
filename = "sim_results/market-marl-nash-3-05-07-2022-00-17-5840_results.pkl"

with open(filename, 'rb') as f:
    res = pkl.load(f)

sns.set_theme(style="darkgrid")
data_losses = {"Q Function Loss": res.losses, "Epsilon Function Loss": res.losses_eps, "Nash Net Loss": res.losses_nash}
data_losses_df = pd.DataFrame(data=data_losses)
# sns.lineplot(data=data_losses_df)


In [None]:
res.marl_params

In [None]:
# import matplotlib.pyplot as plt

ax = data_losses_df.plot(y="Q Function Loss", legend=False, figsize=(8, 5))
ax2 = ax.twinx()

data_losses_df.plot(y="Nash Net Loss", ax=ax2, legend=False, color="orange")

# data_losses_df.plot(y="Epsilon Function Loss", ax=ax2, legend=False, color="red")
ax.figure.legend()


In [None]:
# import matplotlib.pyplot as plt
# sns.lineplot(data=res.losses, color="b", legend="auto")
# ax2 = plt.twinx()
# sns.lineplot(data=res.losses_eps, color="g", ax=ax2)

In [None]:
def moving_average(x, w=3):
    return np.convolve(x, np.ones(w), 'valid') / w

all_rewards = res.episode_rewards
agent_ind = 0
x = res.episode_rewards[:, -1, agent_ind]
smoothed_episode_rewards = moving_average(x)

# episode rewards all agents

sns.set()
x = np.arange(len(all_rewards))

# plt.plot(x, avg_ag_actions, 'b-', label='Average Agent Price')
# plt.plot(x, ref_prices, 'r--', label='Ref. Price')
plt.plot(x[2:], smoothed_episode_rewards, 'r', label='Rewards per agent')
plt.plot(x, res.avg_epoch_rewards_agent, 'b', label='Reward agent of interest')


plt.legend(title='Pricing actions and Nash Boundary')
plt.show()

In [None]:
# Plot rewards

data_rewards = {"Average Agent Reward": res.avg_epoch_rewards, "Average Reward for Agent 0": res.avg_epoch_rewards_agent}
data_rewards_df = pd.DataFrame(data=data_rewards)

sns.set(rc={'figure.figsize':(9, 5)})
sns.lineplot(data=data_rewards_df)

In [None]:


# Theoretical Data
# tm = TheoreticalMarket(beta0 = 30, beta1 = -1.1, beta2 = -2, a = 0.1, ref_p = 1.5)
# beta0 = 25
# beta1 = -0.9
# beta2 = -1.1
# a = 0.1

# Market 2
# beta0 = 25
# beta1 = -0.6
# beta2 = -6.1
# a = 0.1

# Market 3
beta0 = 15
beta1 = -1.05
beta2 = -3.1
a = 0.1

# Config 4
# beta0 = 13
# beta1 = -5.05
# beta2 = -2.1
# a = 0.2

# Config 5
# beta0 = 27
# beta1 = -3.05
# beta2 = -1.1
# a = 0.2



# Simulation Data
ep = 31
# ref_prices = res.state_tracker_epoch[ep]
ref_prices = res.state_tracker[ep]

episode_rewards = res.episode_rewards[ep]

joint_actions = res.episode_actions
episode_joint_actions = joint_actions[ep]

prices = np.array([get_nash_eps_curve(r, beta0, beta1, beta2, a)[0] for r in ref_prices])
devs = np.array([get_nash_eps_curve(r, beta0, beta1, beta2, a)[1] for r in ref_prices])
demand = np.array([get_nash_eps_curve(r, beta0, beta1, beta2, a)[2] for r in ref_prices])

In [None]:

y = ref_prices
x = prices[0]
X, Y = np.meshgrid(x, y) 
z = devs

# min_eps_price, max_eps_price = get_eps0_range(9.9, Y, X, z)

nash_eps_bounds = [get_eps0_range(ref_price, Y, X, z) for ref_price in ref_prices ]
nash_lower_bound =  [v[0] for v in nash_eps_bounds]
nash_upper_bound =  [v[1] for v in nash_eps_bounds]

# Episode rewards 

avg_ag_rewards = episode_rewards.mean(axis = 1)
avg_ag_actions= episode_joint_actions.mean(axis = 1)
agent_interest_action = episode_joint_actions[:, 1]

sns.set()
x = np.arange(len(avg_ag_actions))

plt.plot(x, avg_ag_actions, '--', label='Average Agent Price')
# plt.plot(x, ref_prices, 'r--', label='Ref. Price')
# plt.plot(x, avg_ag_rewards, 'r--', label='Ref. Price')
plt.plot(x, agent_interest_action, 'o--', label='Agent 0 Price')



plt.fill_between(x, nash_upper_bound, nash_lower_bound, color='b', alpha=0.2)


plt.legend(title='Pricing actions and Nash Boundary')
plt.show()

In [None]:
from scipy.ndimage.interpolation import shift

computed_ref_price = np.mean(res.episode_actions[:,-1, :], axis =1)
computed_avg_action = shift(computed_ref_price, -1, cval=computed_ref_price[-1])
computed_ref_price_pair = np.array([computed_ref_price, computed_avg_action]).T


In [None]:
nash_eps_bounds_all_ep = [get_eps0_range(ref_price, Y, X, z) for ref_price in computed_ref_price]

nash_lower_bound_all_ep =  [v[0] for v in nash_eps_bounds_all_ep]
nash_upper_bound_all_ep =  [v[1] for v in nash_eps_bounds_all_ep]

In [None]:
import copy

def est_revenue_from_refprice(crp):
    demand = beta0 + beta1*crp[1] + beta2*(crp[1] - crp[0])
    return demand * crp[1]

nash_lower_bound_ref_pair = copy.deepcopy(computed_ref_price_pair)
nash_upper_bound_ref_pair = copy.deepcopy(computed_ref_price_pair)
nash_lower_bound_ref_pair[:, 1] = nash_lower_bound_all_ep
nash_upper_bound_ref_pair[:, 1] = nash_upper_bound_all_ep
nash_lower_bound_all_rev_ep =  [est_revenue_from_refprice(v)/3 for v in nash_lower_bound_ref_pair]
nash_upper_bound_all_rev_ep =  [est_revenue_from_refprice(v)/3 for v in nash_upper_bound_ref_pair]

In [None]:
sns.set()
x = np.arange(len(nash_lower_bound_all_ep))

plt.fill_between(x, nash_lower_bound_all_rev_ep, nash_upper_bound_all_rev_ep, color='b', alpha=0.2)

In [None]:




sns.set()
x = np.arange(len(nash_lower_bound_all_ep))

plt.plot(x, res.avg_epoch_rewards, '--', label='Average Episode Rewards')
plt.plot(x, res.avg_epoch_rewards_agent, '--', label='Reward Agent 0')
# plt.plot(x, ref_prices, 'r--', label='Ref. Price')
# plt.plot(x, avg_ag_rewards, 'r--', label='Ref. Price')
# plt.plot(x, agent_interest_action, 'o--', label='Agent 0 Price')

plt.fill_between(x, nash_lower_bound_all_ep, nash_upper_bound_all_ep, color='b', alpha=0.2)


In [None]:
# When the solid blue line in inside the fill, there is no incentive to deviate, otherwise, there is incentive, and the agents should try to undercut.

In [None]:
# "Ref Prices": ref_prices, 
nash_bound_per_run = {"Nash Lower Bound": nash_lower_bound, 
    "Nash Upper Bound": nash_upper_bound, 
    "Avg. Agent Rewards": avg_ag_rewards, 
    "Avg. Agent Actions": avg_ag_actions}
nash_bound_per_run_df = pd.DataFrame(data=nash_bound_per_run)

sns.set(rc={'figure.figsize':(9, 5)})
sns.lineplot(data=nash_bound_per_run_df)

In [None]:
nash_bound_per_run_df

In [None]:
res.episode_rewards[0]

In [None]:
state_key = [0.0, 0.0, 0.0, 3.0]

res.sna_policy_dict_iter[repr(state_key)][0]

In [None]:
max_p = 0
agent_id = 0
for x in range(10):
    p = res.sna_policy_dict_iter[repr(state_key)][agent_id][x]
    if p > max_p:
        max_p = p
        max_x = x
    print(p)
print(max_x)

In [None]:
max_p = 0
agent_id = 1
for x in range(10):
    p = res.sna_policy_dict_iter[repr(state_key)][agent_id][x]
    if p > max_p:
        max_p = p
        max_x = x
    print(p)
print(max_x)
    

In [None]:
max_p = 0
agent_id = 2
for x in range(10):
    p = res.sna_policy_dict_iter[repr(state_key)][agent_id][x]
    if p > max_p:
        max_p = p
        max_x = x
    print(p)
print(max_x)
    