# Computational Modeling of Behavioral Data by Prof. Kentaro Katahira

## Rescorla-Wagner model

In [1]:
#How can I add unicode caption to label?

using Plots
using Interact


"""
N‚Çú: number of trials
Œ±: learning rate
P·µ£: probability of getting reward
"""

@manipulate for N‚Çú = 0:1:500, Œ± = 0:0.05:1, P·µ£ = 0:0.05:1

    ùêï = zeros(N‚Çú) #strengths of association as N‚Çú-length vector
    ùêë = rand(N‚Çú) .< P·µ£ # presence of reinforcement (1 or 0) as N‚Çú-length vector

    for t in 1: N‚Çú-1
        
        ùêï[t+1] = ùêï[t] + Œ± *(ùêë[t]-ùêï[t])
    end 
    
    plot(ùêï, label= string("a ", Œ±))
    plot!([(i, P·µ£) for i in 1:1:N‚Çú], label="expected value of r: " * string(P·µ£))
    xlabel!("number of trials")
    ylabel!("strength of association")
    ylims!((0, 1))
    title!("Rescorla-Wagner model")
end

## Q-learning simulation

### softmax function

In [3]:
#How can I add unicode caption to label?

function softmax(Œ≤, Œîq)
    return 1 / (1+ exp(-Œ≤ * (Œîq)))
end

@manipulate for Œ≤ in 0:0.05:5
    plot([(Œîq, softmax(Œ≤, Œîq)) for Œîq in -4:0.1:4], m=:o, label=string("beta ", Œ≤))
    xlabel!("difference in Q")
    ylabel!("probability")
    ylims!((0, 1))
    title!("Softmax Function")
end

In [4]:
# how can I input P with small subscript A?

"""
N‚Çú: number of trials
Œ±: learning rate
Œ≤: inverse temperature
P·µ£: probability of getting reward in A
"""

@manipulate for N‚Çú in 0:5:200, Œ± in 0:0.05:1, Œ≤ in 0:0.25:5, P·µ£ in 0:0.05:1
    
    ùêê = zeros((2, N‚Çú)) #initial value of Q in 2 by N‚Çú matrix
    ùêú = zeros(Int, N‚Çú) #initial choice in each N‚Çú trial
    ùê´ = zeros(N‚Çú) # 0 (no reward) or 1 (reward) in each N‚Çú trial
    P‚Çê = zeros(N‚Çú) # probability of choosing A in each trial
    P = (P·µ£, 1-P·µ£)
    
    for t in 1:N‚Çú-1
        P‚Çê = softmax(Œ≤, ùêê[1, t] - ùêê[2, t])
        
        if rand() < P‚Çê
            ùêú[t] = 1 #choose A
            ùê´[t] = Int(rand(Float64) < P[1])
        else
            ùêú[t] = 2 #choose B
            ùê´[t] = Int(rand(Float64) < P[2])
        end
        
        ùêê[ùêú[t], t+1] = ùêê[ùêú[t], t] + Œ± * (ùê´[t] - ùêê[ùêú[t], t])
        ùêê[3 - ùêú[t], t+1] = ùêê[3 - ùêú[t], t] # retain value of unpicked choice
    end
    
    plot(ùêê[1, :], label="Qt(A)", color="orange")
    plot!([(i, P[1]) for i in 1:1:N‚Çú], label="expected value of reward for A:" * string(P[1]), color="darkorange")
    plot!(ùêê[2, :], label="Qt(B)", color="skyblue")
    plot!([(i, P[2]) for i in 1:1:N‚Çú], label="expected value of reward for B:" * string(P[2]), color="darkblue")
    xlabel!("number of trials")
    ylabel!("Q (value of behavior?)")
    ylims!((0, 1))
    title!("Q-learning model")
end

## Parameter Estimation

In [5]:
function func_qlearning(Œ±, Œ≤, ùêú, ùê´)
    
    N‚Çú = length(ùêú)
    P‚Çê = zeros(N‚Çú) #probabilities of selecting A
    ùêê = zeros((2, N‚Çú))
    logl = 0 #initial value of log likelihood
    
    for t in 1:N‚Çú - 1
        P‚Çê = softmax(Œ≤, ùêê[1, t] - ùêê[2, t])
        logl += (ùêú[t] == 1) * log(P‚Çê) + (ùêú[t] == 2) * log(1 - log(P‚Çê))
        ùêê[ùêú[t], t + 1] = ùêê[ùêú[t], t] + Œ± * (ùê´[t] - ùêê[ùêú[t], t])
        ùêê[3 - ùêú[t], t + 1] =  ùêê[3 - ùêú[t], t]
    end
    return (negll = -logl, ùêê = ùêê, P‚Çê = P‚Çê);
end

func_qlearning (generic function with 1 method)

In [6]:
func_qlearning(0.3, 0.3, [1, 2, 1], [1, 0, 1])

(negll = 0.19288151398198217, ùêê = [0.0 0.3 0.3; 0.0 0.0 0.0], P‚Çê = 0.5224848247918001)

In [7]:

"""

plt =  plot3d(
        1,
        xlim = (0, 1),
        ylim = (-0, 10),
        zlim = (0, 1),
        )


for Œ± in 0:0.05:1, Œ≤ in 0:0.25:10
    #push!(plt, Œ±, Œ≤, func_qlearning(Œ±, Œ≤, [1, 2, 1], [1, 0, 1]).negll)
    
end
"""

"\nplt =  plot3d(\n        1,\n        xlim = (0, 1),\n        ylim = (-0, 10),\n        zlim = (0, 1),\n        )\n\n\nfor Œ± in 0:0.05:1, Œ≤ in 0:0.25:10\n    #push!(plt, Œ±, Œ≤, func_qlearning(Œ±, Œ≤, [1, 2, 1], [1, 0, 1]).negll)\n    \nend\n"