/
sim_EWA.R
126 lines (102 loc) · 4.68 KB
/
sim_EWA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#' @title Simulates an Experienced Weighted Attraction model
#' @description \code{sim_EWA()} simulates plays of a normal-form
#' game expected by an experienced weighted attraction (EWA) model.
#' @details Simulate plays of a normal-form game defined by
#' \code{normal_form()} in a way expected by an EWA model.
#' @param game An object of \code{normal_form} class defined by
#' \code{normal_form()}.
#' @param n_periods A positive integer specifying how many times the game is
#' played within each sample.
#' @param lambda A positive real value representing the players' sensitivity to
#' attraction values of strategies. As \code{lambda} gets larger, the choice
#' will be dependent on attraction values more heavily. As \code{lambda}
#' gets close to 0, a strategy will tend to be chosen randomly.
#' @param delta A real number between 0 and 1. This parameter controls how fast
#' attraction values of strategies that are not chosen are updated. If
#' \code{delta = 0}, attraction is updated only for the strategy that is
#' selected at the given period (i.e., reinforcement learning is
#' implemented). If \code{delta = 1}, attraction is updated equally for all
#' strategies (i.e., belief-based learning model is applied).
#' @param rho A real value between 0 and 1. This parameter controls the learning
#' speed. \code{rho = 0} for "reinforcement" leaning and "belief" based
#' learning.
#' @param phi A real value between 0 and 1. This parameter controls how much
#' attraction values at the current period are constrained by the past
#' attraction values. If \code{phi = 0}, the past attraction values are
#' ignored. \code{phi = 1} for "reinforcement" leaning and "belief" based
#' learning.
#' @param A1_init An initial value of Player 1's attraction for each strategy.
#' @param A2_init An initial value of Player 2's attraction for each strategy.
#' @param N_init An initial value of N.
#' @author Yoshio Kamijo and Yuki Yanai <yanai.yuki@@kochi-tech.ac.jp>
sim_EWA <- function(game,
n_periods,
lambda = 1,
delta = 0.5,
rho = 0.5,
phi = 0.5,
A1_init = 0,
A2_init = 0,
N_init = 0) {
pi1 <- game$mat$matrix1
pi2 <- game$mat$matrix2
n1 <- length(game$strategy$s1)
n2 <- length(game$strategy$s2)
## A (attraction) and P (Probability)
A1 <- P1 <- matrix(NA, nrow = n_periods + 1, ncol = n1) # Player 1
A2 <- P2 <- matrix(NA, nrow = n_periods + 1, ncol = n2) # Player 2
A1[1, ] <- A1_init
A2[1, ] <- A2_init
N <- rep(NA, n_periods)
N[1] <- N_init
choice1 <- choice2 <- rep(NA, n_periods + 1)
for (t in (1:n_periods) + 1) {
w1 <- A1[t - 1, ]
w2 <- A2[t - 1, ]
N[t] <- rho * N[t - 1] + 1
lambda1 <- lambda * w1
lambda2 <- lambda * w2
max1 <- max(lambda1)
max2 <- max(lambda2)
dif1 <- lambda1 - max1
dif2 <- lambda2 - max2
ln_P1 <- lambda1 - (max1 + log(sum(exp(dif1))))
ln_P2 <- lambda2 - (max2 + log(sum(exp(dif2))))
P1[t, ] <- exp(ln_P1)
P2[t, ] <- exp(ln_P2)
choice1[t] <- sample(1:n1, size = 1, prob = P1[t, ])
choice2[t] <- sample(1:n2, size = 1, prob = P2[t, ])
## vector indicating which strategy was selected
e1 <- numeric(n1)
e1[choice1[t]] <- 1
e2 <- numeric(n2)
e2[choice2[t]] <- 1
## Player 1's payoff given Player 2's action
payoff1_vec <- as.vector(pi1 %*% e2)
# Player2 's payoff given Player 1's action
payoff2_vec <- as.vector(e1 %*% pi2)
## update attraction
A1[t, ] <- (phi * N[t - 1] * A1[t - 1, ] + delta * payoff1_vec) / N[t]
A2[t, ] <- (phi * N[t - 1] * A2[t - 1, ] + delta * payoff2_vec) / N[t]
## update attraction for the strategy actually chosen
payoff1 <- as.numeric(e1 %*% pi1 %*% e2)
payoff2 <- as.numeric(e1 %*% pi2 %*% e2)
A1[t, choice1[t]] <- (phi * N[t - 1] * A1[t - 1, choice1[t]] + payoff1) / N[t]
A2[t, choice2[t]] <- (phi * N[t - 1] * A2[t - 1, choice2[t]] + payoff2) / N[t]
}
df <- data.frame(player1 = game$strategy$s1[choice1[-1]],
player2 = game$strategy$s2[choice2[-1]],
period = 1:n_periods)
colnames(A1) <- colnames(P1) <- game$strategy$s1
colnames(A2) <- colnames(P2) <- game$strategy$s2
A1 <- as.data.frame(A1[-1, ])
A2 <- as.data.frame(A2[-1, ])
P1 <- as.data.frame(P1[-1, ])
P2 <- as.data.frame(P2[-1, ])
A1$period <- A2$period <- P1$period <- P2$period <- 1:n_periods
return(list(data = df,
attraction = list(A1 = A1,
A2 = A2),
choice_prob = list(P1 = P1,
P2 = P2)))
}