In [1]:
using Printf
using CSV
using DataFrames
using SpecialFunctions
using LinearAlgebra
#3537.732014626929	25.323011946046602	133.0995949999986	3379.309407680884


In [2]:
#Small.csv

mutable struct QLearning
    𝒮 # state space (assumes 1:nstates) 
    𝒜 # action space (assumes 1:nactions) 
    γ # discount
    Q # action value function
    α # learning rate 
end

function update!(model::QLearning, s, a, r, s′) 
    γ, Q, α = model.γ, model.Q, model.α
    Q[s,a] += α*(r + γ*maximum(Q[s′,:]) .- Q[s,a])
    return model
end

function simulate2(df, model, h)
    prior = 0
    for i in 1:h
        for (s,a,r,sp) in eachrow(df)
            update!(model, s, a, r, sp)
        end
        if sum(model.Q) - prior < 0.001
            print(i)
            break
        end
        prior = sum(model.Q)
    end
end

simulate2 (generic function with 1 method)

In [3]:
#Small
filename = "small.policy"
inputfile = "small.csv"
df = DataFrame(CSV.File(inputfile));

In [6]:
γ = 0.95
lengths = 100
lengtha = 4
𝒮 = collect(1:lengths)
𝒜 = collect(1:lengtha)
Q = zeros(lengths, lengtha)
α = 0.2 # learning rate
model = QLearning(𝒮, 𝒜, γ, Q, α) 
k = 1 # number of steps to simulate 
simulate2(df,model, k)

v, π = findmax(model.Q,dims = 2)

open(filename, "w+") do io
    for s in 1:length(π)
        println(io,π[s][2])
    end
end

LoadError: BoundsError: attempt to access 100×4 Matrix{Float64} at index [25203, 1]

In [None]:
#medium
filename = "medium.policy"
inputfile = "medium.csv"
df = DataFrame(CSV.File(inputfile));

In [None]:
γ = 1
lengths = 50000
lengtha = 7
𝒮 = collect(1:lengths)
𝒜 = collect(1:lengtha)
Q = zeros(lengths, lengtha)
α = 0.2 # learning rate
model = QLearning(𝒮, 𝒜, γ, Q, α) 
α = 1 # exploration decay factor
k = 150 # number of steps to simulate 
simulate2(df,model, k)

In [342]:
temp = rand(Float64, (1, lengtha))
for s in collect(1:lengths)
    if sum(model.Q[s,:]) == 0
        model.Q[s,:] = temp
    else
        temp = model.Q[s,:]
    end
end



In [343]:
v, π = findmax(model.Q,dims = 2)
open(filename, "w+") do io
    for s in 1:length(π)
        println(io,π[s][2])
    end
end

In [377]:
#large
filename = "large.policy"
inputfile = "large.csv"
df = DataFrame(CSV.File(inputfile));

In [365]:
γ = 0.95
lengths = 312020
lengtha = 9
𝒮 = collect(1:lengths)
𝒜 = collect(1:lengtha)
Q = zeros(lengths, lengtha)
α = 0.2 # learning rate
model = QLearning(𝒮, 𝒜, γ, Q, α) 
k = 1 # number of steps to simulate 
simulate2(df,model, k)

temp = rand(Float64, (1, lengtha))
for s in collect(1:lengths)
    if sum(model.Q[s,:]) == 0
        model.Q[s,:] = temp
    else
        temp = model.Q[s,:]
    end
end

seen = unique(df[!,:s]);
unseen = setdiff(collect(1:lengths),seen);

v, π = findmax(model.Q,dims = 2)


([0.8213646030974739; 0.8213646030974739; … ; 23.808131328465983; 23.808131328465983], CartesianIndex{2}[CartesianIndex(1, 4); CartesianIndex(2, 4); … ; CartesianIndex(312019, 5); CartesianIndex(312020, 5)])

In [348]:
open(filename, "w+") do io
    for s in 1:length(π)
        println(io,π[s][2])
    end
end