# Exploration and Exploitation

In [None]:
using PGFPlots
using Interact
using Reactive
using Distributions
include("helpers.jl")
include("bandits.jl");

## Multi-Armed Bandit Problems

In [2]:
using Random
Random.seed!(2)
arms = 3
b = Bandit(arms)
banditTrial(b)

## Bayesian Model Estimation

In [3]:
Random.seed!(4)
arms = 2
b = Bandit(arms)
banditEstimation(b)

! Undefined control sequence.
\sa@placebox ->\newpage \global \pdfpagewidth 
                                              =\wd \sa@box \global \pdfpageh...
l.127 \end{tikzpicture}
                     


Observable{Any} with 0 listeners. Value:
Axis(PGFPlots.Plots.Plot[PGFPlots.Plots.Linear(Real[0.0 0.010101010101010102 … 0.98989898989899 1.0; 1.0 1.0 … 1.0 1.0], "none", nothing, nothing, "Beta(1, 1)", nothing, nothing, nothing, false)], nothing, nothing, nothing, nothing, nothing, nothing, nothing, 0, 1, 0, nothing, nothing, nothing, nothing, nothing, nothing, "15cm", "10cm", nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, "axis")

! Undefined control sequence.
\sa@placebox ->\newpage \global \pdfpagewidth 
                                              =\wd \sa@box \global \pdfpageh...
l.127 \end{tikzpicture}
                     
! Undefined control sequence.
\sa@placebox ->\newpage \global \pdfpagewidth 
                                              =\wd \sa@box \global \pdfpageh...
l.127 \end{tikzpicture}
                     


Observable{Any} with 0 listeners. Value:
Axis(PGFPlots.Plots.Plot[PGFPlots.Plots.Linear(Real[0.0 0.010101010101010102 … 0.98989898989899 1.0; 1.0 1.0 … 1.0 1.0], "none", nothing, nothing, "Beta(1, 1)", nothing, nothing, nothing, false)], nothing, nothing, nothing, nothing, nothing, nothing, nothing, 0, 1, 0, nothing, nothing, nothing, nothing, nothing, nothing, "15cm", "10cm", nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, "axis")

! Undefined control sequence.
\sa@placebox ->\newpage \global \pdfpagewidth 
                                              =\wd \sa@box \global \pdfpageh...
l.127 \end{tikzpicture}
                     


## Ad Hoc Exploration Strategies

In [4]:
# Select random action with probability eps, otherwise greedy
struct EpsGreedy <: BanditPolicy
    eps::Real 
end
function arm(b::EpsGreedy, s::BanditStatistics)
    if rand() < b.eps
        D = DiscreteUniform(1, numArms(s))
        return rand(D)
    else
        return argmax(winProbabilities(s))
    end
end;

In [5]:
# Select arm with probability proportional to exp(precision*winprobability)
struct SoftMax <: BanditPolicy
    precision::Real 
end
function arm(b::SoftMax, s::BanditStatistics)
    p = exp.(b.precision * winProbabilities(s))
    p = p / sum(p)
    D = Categorical(p)
    return rand(D)
end;

In [6]:
# Select arm with highest alpha upper confidence bound
struct IntervalExploration <: BanditPolicy
    alpha::Real
end
function arm(b::IntervalExploration, s::BanditStatistics)
    i = argmax([quantile(Beta(s.numWins[i] + 1, s.numTries[i] - s.numWins[i] + 1), b.alpha) for i in 1:length(s.numWins)]) 
end;

In [7]:
steps = 50
iterations = 1000
bandit = Bandit(collect(0.1:0.2:1))
# bandit = Bandit(collect(1:-0.2:0.1))
epsgreedy = 0.1
softmax = 2
interval = 0.05
@manipulate for epsgreedy in 0:0.1:1, softmax in 0:2:40, interval in 0.5:0.05:1
    epsGreedyResults = simulateAverage(bandit, EpsGreedy(epsgreedy), steps=steps, iterations = iterations)./collect(1:steps)
    softMaxResults = simulateAverage(bandit, SoftMax(softmax), steps=steps, iterations = iterations)./collect(1:steps)
    intervalResults = simulateAverage(bandit, IntervalExploration(interval), steps=steps, iterations = iterations)./collect(1:steps)
    Axis([
        Plots.Linear(epsGreedyResults, legendentry="eps greedy", style="very thick", mark="none"),
        Plots.Linear(softMaxResults, legendentry="softmax", style="very thick", mark="none"),  
        Plots.Linear(intervalResults, legendentry="interval", style="very thick", mark="none")  
        ], style="legend pos=south east", ymin=0, ymax=1, xmin=0, xmax=steps, xlabel="Pulls", ylabel="Average success")
end

! Undefined control sequence.
\sa@placebox ->\newpage \global \pdfpagewidth 
                                              =\wd \sa@box \global \pdfpageh...
l.194 \end{tikzpicture}
                     


Node{WebIO.DOM}(WebIO.DOM(:html, :div), Any[Node{WebIO.DOM}(WebIO.DOM(:html, :div), Any[Scope(Node{WebIO.DOM}(WebIO.DOM(:html, :div), Any[Node{WebIO.DOM}(WebIO.DOM(:html, :div), Any[Node{WebIO.DOM}(WebIO.DOM(:html, :label), Any["epsgreedy"], Dict{Symbol,Any}(:className => "interact ",:style => Dict{Any,Any}(:padding => "5px 10px 0px 10px")))], Dict{Symbol,Any}(:className => "interact-flex-row-left")), Node{WebIO.DOM}(WebIO.DOM(:html, :div), Any[Node{WebIO.DOM}(WebIO.DOM(:html, :input), Any[], Dict{Symbol,Any}(:max => 11,:min => 1,:attributes => Dict{Any,Any}(:type => "range",Symbol("data-bind") => "numericValue: index, valueUpdate: 'input', event: {change: function (){this.changes(this.changes()+1)}}","orient" => "horizontal"),:step => 1,:className => "slider slider is-fullwidth",:style => Dict{Any,Any}()))], Dict{Symbol,Any}(:className => "interact-flex-row-center")), Node{WebIO.DOM}(WebIO.DOM(:html, :div), Any[Node{WebIO.DOM}(WebIO.DOM(:html, :p), Any[], Dict{Symbol,Any}(:attributes 

! Undefined control sequence.
\sa@placebox ->\newpage \global \pdfpagewidth 
                                              =\wd \sa@box \global \pdfpageh...
l.194 \end{tikzpicture}
                     
