Skip to content

Commit

Permalink
Merge branch 'master' into rf-logging
Browse files Browse the repository at this point in the history
  • Loading branch information
tspooner committed Oct 22, 2017
2 parents e0b7937 + 615891e commit 5c8a636
Show file tree
Hide file tree
Showing 25 changed files with 603 additions and 957 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ gcc = "0.3"

[dependencies]
log = "0.3"
blas = "0.15"
blas = "0.18"
libc = "0.2"
rand = "0.3"
slog = "2.0"
time = "0.1"
ndarray = "0.9"
ndarray = "0.10"
slog-term = "2.2"
slog-async = "2.1"
2 changes: 1 addition & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ extern crate gcc;


fn main() {
gcc::Config::new()
gcc::Build::new()
.file("extern/tiles.c")
.include("extern")
.compile("libtiles.a");
Expand Down
9 changes: 7 additions & 2 deletions examples/sandbox.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
extern crate rsrl;

use rsrl::{run, Parameter, SerialExperiment, Evaluation};
use rsrl::fa::linear::RBFNetwork;

use rsrl::fa::Linear;
use rsrl::fa::projection::RBFNetwork;

use rsrl::agents::control::td::QSigma;
use rsrl::domains::{Domain, MountainCar};
use rsrl::policies::{Greedy, EpsilonGreedy};
Expand All @@ -28,7 +31,9 @@ fn main() {
let aspace = domain.action_space();
let n_actions: usize = aspace.span().into();

let q_func = RBFNetwork::new(domain.state_space().partitioned(8), n_actions);
let pr = RBFNetwork::from_space(domain.state_space().partitioned(8));
let q_func = Linear::new(pr, n_actions);

let policy = EpsilonGreedy::new(aspace, Parameter::exponential(0.9, 0.01, 0.99));

QSigma::new(q_func, policy, 0.05, 0.99, 0.2, 2)
Expand Down
39 changes: 18 additions & 21 deletions src/agents/control/gtd.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use Parameter;
use fa::{VFunction, QFunction, Linear};
use fa::{Function, VFunction, QFunction, Projection, Linear};
use agents::ControlAgent;
use domains::Transition;
use geometry::{Space, ActionSpace};
Expand All @@ -11,9 +11,9 @@ use std::marker::PhantomData;
///
/// Maei, Hamid R., et al. "Toward off-policy learning control with function approximation."
/// Proceedings of the 27th International Conference on Machine Learning (ICML-10). 2010.
pub struct GreedyGQ<S: Space, Q: QFunction<S>, V: VFunction<S>, P: Policy> {
q_func: Q,
v_func: V,
pub struct GreedyGQ<S: Space, M: Projection<S>, P: Policy> {
q_func: Linear<S, M>,
v_func: Linear<S, M>,

policy: P,

Expand All @@ -24,13 +24,9 @@ pub struct GreedyGQ<S: Space, Q: QFunction<S>, V: VFunction<S>, P: Policy> {
phantom: PhantomData<S>
}

impl<S: Space, Q, V, P> GreedyGQ<S, Q, V, P>
where Q: QFunction<S>,
V: VFunction<S>,
P: Policy
{
pub fn new<T1, T2, T3>(q_func: Q, v_func: V, policy: P,
alpha: T1, beta: T2, gamma: T3) -> Self
impl<S: Space, M: Projection<S>, P: Policy> GreedyGQ<S, M, P> {
pub fn new<T1, T2, T3>(q_func: Linear<S, M>, v_func: Linear<S, M>,
policy: P, alpha: T1, beta: T2, gamma: T3) -> Self
where T1: Into<Parameter>,
T2: Into<Parameter>,
T3: Into<Parameter>
Expand All @@ -50,35 +46,36 @@ impl<S: Space, Q, V, P> GreedyGQ<S, Q, V, P>
}
}

impl<S: Space, Q, V, P> ControlAgent<S, ActionSpace> for GreedyGQ<S, Q, V, P>
where Q: QFunction<S> + Linear<S>,
V: VFunction<S> + Linear<S>,
P: Policy
impl<S: Space, M: Projection<S>, P: Policy> ControlAgent<S, ActionSpace> for GreedyGQ<S, M, P>
{
fn pi(&mut self, s: &S::Repr) -> usize {
self.policy.sample(self.q_func.evaluate(s).as_slice())
let qs: Vec<f64> = self.q_func.evaluate(s);

self.policy.sample(qs.as_slice())
}

fn evaluate_policy<T: Policy>(&self, p: &mut T, s: &S::Repr) -> usize {
p.sample(self.q_func.evaluate(s).as_slice())
let qs: Vec<f64> = self.q_func.evaluate(s);

p.sample(qs.as_slice())
}

fn handle_transition(&mut self, t: &Transition<S, ActionSpace>) {
let a = t.action;
let (s, ns) = (t.from.state(), t.to.state());

let phi_s = self.q_func.phi(s);
let phi_ns = self.q_func.phi(ns);
let phi_s = self.q_func.project(s);
let phi_ns = self.q_func.project(ns);

let td_error = t.reward +
self.q_func.evaluate_action_phi(&(self.gamma.value()*&phi_ns - &phi_s), a);
let td_estimate = self.v_func.evaluate(s);
let td_estimate: f64 = self.v_func.evaluate(s);

let update_q = td_error*&phi_s - self.gamma*td_estimate*phi_ns;
let update_v = (td_error - td_estimate)*phi_s;

self.q_func.update_action_phi(&update_q, a, self.alpha.value());
self.v_func.update_phi(&update_v, self.alpha*self.beta);
VFunction::update_phi(&mut self.v_func, &update_v, self.alpha*self.beta);
}

fn handle_terminal(&mut self, _: &S::Repr) {
Expand Down
4 changes: 2 additions & 2 deletions src/agents/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ impl Trace {

pub fn update(&mut self, phi: &Array1<f64>) {
match self {
&mut Trace::Accumulating { ref mut eligibility, lambda } =>
&mut Trace::Accumulating { ref mut eligibility, .. } =>
{
*eligibility += phi;
},
&mut Trace::Replacing { ref mut eligibility, lambda } =>
&mut Trace::Replacing { ref mut eligibility, .. } =>
{
eligibility.zip_mut_with(phi, |val, &p| {
*val = f64::min(1.0, *val + p);
Expand Down
47 changes: 17 additions & 30 deletions src/agents/prediction/gtd.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,21 @@
use Parameter;
use fa::{VFunction, Linear};
use fa::{VFunction, Projection, Linear};
use agents::PredictionAgent;
use geometry::Space;
use std::marker::PhantomData;


pub struct GTD2<S: Space, V: VFunction<S> + Linear<S>>
pub struct GTD2<S: Space, P: Projection<S>>
{
v_func: V,
a_func: V,
v_func: Linear<S, P>,
a_func: Linear<S, P>,

alpha: Parameter,
beta: Parameter,
gamma: Parameter,

phantom: PhantomData<S>,
}

impl<S: Space, V> GTD2<S, V>
where V: VFunction<S> + Linear<S>
{
pub fn new<T1, T2, T3>(v_func: V, a_func: V,
impl<S: Space, P: Projection<S>> GTD2<S, P> {
pub fn new<T1, T2, T3>(v_func: Linear<S, P>, a_func: Linear<S, P>,
alpha: T1, beta: T2, gamma: T3) -> Self
where T1: Into<Parameter>,
T2: Into<Parameter>,
Expand All @@ -37,18 +32,16 @@ impl<S: Space, V> GTD2<S, V>
alpha: alpha.into(),
beta: beta.into(),
gamma: gamma.into(),

phantom: PhantomData,
}
}
}

impl<S: Space, V> PredictionAgent<S> for GTD2<S, V>
where V: VFunction<S> + Linear<S>
where V: VFunction<S> + Projection<S>
{
fn handle_transition(&mut self, s: &S::Repr, ns: &S::Repr, r: f64) -> Option<f64> {
let phi_s = self.v_func.phi(s);
let phi_ns = self.v_func.phi(ns);
let phi_s = self.v_func.project(s);
let phi_ns = self.v_func.project(ns);

let td_error = r + self.gamma*self.v_func.evaluate_phi(&phi_ns) -
self.v_func.evaluate_phi(&phi_s);
Expand All @@ -68,22 +61,18 @@ impl<S: Space, V> PredictionAgent<S> for GTD2<S, V>
}


pub struct TDC<S: Space, V: VFunction<S> + Linear<S>>
pub struct TDC<S: Space, P: Projection<S>>
{
v_func: V,
a_func: V,
v_func: Linear<S, P>,
a_func: Linear<S, P>,

alpha: Parameter,
beta: Parameter,
gamma: Parameter,

phantom: PhantomData<S>,
}

impl<S: Space, V> TDC<S, V>
where V: VFunction<S> + Linear<S>
{
pub fn new<T1, T2, T3>(v_func: V, a_func: V,
impl<S: Space, P: Projection<S>> TDC<S, P> {
pub fn new<T1, T2, T3>(v_func: Linear<S, P>, a_func: Linear<S, P>,
alpha: T1, beta: T2, gamma: T3) -> Self
where T1: Into<Parameter>,
T2: Into<Parameter>,
Expand All @@ -100,18 +89,16 @@ impl<S: Space, V> TDC<S, V>
alpha: alpha.into(),
beta: beta.into(),
gamma: gamma.into(),

phantom: PhantomData,
}
}
}

impl<S: Space, V> PredictionAgent<S> for TDC<S, V>
where V: VFunction<S> + Linear<S>
where V: VFunction<S> + Projection<S>
{
fn handle_transition(&mut self, s: &S::Repr, ns: &S::Repr, r: f64) -> Option<f64> {
let phi_s = self.v_func.phi(s);
let phi_ns = self.v_func.phi(ns);
let phi_s = self.v_func.project(s);
let phi_ns = self.v_func.project(ns);

let td_error = r + self.gamma*self.v_func.evaluate_phi(&phi_ns) -
self.v_func.evaluate_phi(&phi_s);
Expand Down
27 changes: 10 additions & 17 deletions src/agents/prediction/td.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use Parameter;
use fa::{VFunction, Linear};
use fa::{VFunction, Projection, Linear};
use agents::PredictionAgent;
use agents::memory::Trace;
use geometry::Space;
Expand Down Expand Up @@ -55,42 +55,35 @@ impl<S: Space, V> PredictionAgent<S> for TD<S, V>
}


pub struct TDLambda<S: Space, V: VFunction<S> + Linear<S>>
pub struct TDLambda<S: Space, P: Projection<S>>
{
v_func: V,
trace: Trace,
v_func: Linear<S, P>,

alpha: Parameter,
gamma: Parameter,

phantom: PhantomData<S>,
}

impl<S: Space, V> TDLambda<S, V>
where V: VFunction<S> + Linear<S>
{
pub fn new<T1, T2>(v_func: V, trace: Trace, alpha: T1, gamma: T2) -> Self
impl<S: Space, P: Projection<S>> TDLambda<S, P> {
pub fn new<T1, T2>(trace: Trace, v_func: Linear<S, P>,
alpha: T1, gamma: T2) -> Self
where T1: Into<Parameter>,
T2: Into<Parameter>
{
TDLambda {
v_func: v_func,
trace: trace,
v_func: v_func,

alpha: alpha.into(),
gamma: gamma.into(),

phantom: PhantomData,
}
}
}

impl<S: Space, V> PredictionAgent<S> for TDLambda<S, V>
where V: VFunction<S> + Linear<S>
{
impl<S: Space, P: Projection<S>> PredictionAgent<S> for TDLambda<S, P> {
fn handle_transition(&mut self, s: &S::Repr, ns: &S::Repr, r: f64) -> Option<f64> {
let phi_s = self.v_func.phi(s);
let phi_ns = self.v_func.phi(ns);
let phi_s = self.v_func.project(s);
let phi_ns = self.v_func.project(ns);

self.trace.decay(self.gamma.value());
self.trace.update(&phi_s);
Expand Down

0 comments on commit 5c8a636

Please sign in to comment.