Merge branch 'master' into rf-logging

tspooner · Oct 22, 2017 · 5c8a636 · 5c8a636
2 parents e0b7937 + 615891e
commit 5c8a636
Show file tree

Hide file tree

Showing 25 changed files with 603 additions and 957 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,11 +11,11 @@ gcc = "0.3"
 
 [dependencies]
 log = "0.3"
-blas = "0.15"
+blas = "0.18"
 libc = "0.2"
 rand = "0.3"
 slog = "2.0"
 time = "0.1"
-ndarray = "0.9"
+ndarray = "0.10"
 slog-term = "2.2"
 slog-async = "2.1"
diff --git a/build.rs b/build.rs
@@ -2,7 +2,7 @@ extern crate gcc;
 
 
 fn main() {
-    gcc::Config::new()
+    gcc::Build::new()
         .file("extern/tiles.c")
         .include("extern")
         .compile("libtiles.a");

diff --git a/examples/sandbox.rs b/examples/sandbox.rs
@@ -1,7 +1,10 @@
 extern crate rsrl;
 
 use rsrl::{run, Parameter, SerialExperiment, Evaluation};
-use rsrl::fa::linear::RBFNetwork;
+
+use rsrl::fa::Linear;
+use rsrl::fa::projection::RBFNetwork;
+
 use rsrl::agents::control::td::QSigma;
 use rsrl::domains::{Domain, MountainCar};
 use rsrl::policies::{Greedy, EpsilonGreedy};
@@ -28,7 +31,9 @@ fn main() {
         let aspace = domain.action_space();
         let n_actions: usize = aspace.span().into();
 
-        let q_func = RBFNetwork::new(domain.state_space().partitioned(8), n_actions);
+        let pr = RBFNetwork::from_space(domain.state_space().partitioned(8));
+        let q_func = Linear::new(pr, n_actions);
+
         let policy = EpsilonGreedy::new(aspace, Parameter::exponential(0.9, 0.01, 0.99));
 
         QSigma::new(q_func, policy, 0.05, 0.99, 0.2, 2)

diff --git a/src/agents/control/gtd.rs b/src/agents/control/gtd.rs
@@ -1,5 +1,5 @@
 use Parameter;
-use fa::{VFunction, QFunction, Linear};
+use fa::{Function, VFunction, QFunction, Projection, Linear};
 use agents::ControlAgent;
 use domains::Transition;
 use geometry::{Space, ActionSpace};
@@ -11,9 +11,9 @@ use std::marker::PhantomData;
 ///
 /// Maei, Hamid R., et al. "Toward off-policy learning control with function approximation."
 /// Proceedings of the 27th International Conference on Machine Learning (ICML-10). 2010.
-pub struct GreedyGQ<S: Space, Q: QFunction<S>, V: VFunction<S>, P: Policy> {
-    q_func: Q,
-    v_func: V,
+pub struct GreedyGQ<S: Space, M: Projection<S>, P: Policy> {
+    q_func: Linear<S, M>,
+    v_func: Linear<S, M>,
 
     policy: P,
 
@@ -24,13 +24,9 @@ pub struct GreedyGQ<S: Space, Q: QFunction<S>, V: VFunction<S>, P: Policy> {
     phantom: PhantomData<S>
 }
 
-impl<S: Space, Q, V, P> GreedyGQ<S, Q, V, P>
-    where Q: QFunction<S>,
-          V: VFunction<S>,
-          P: Policy
-{
-    pub fn new<T1, T2, T3>(q_func: Q, v_func: V, policy: P,
-                           alpha: T1, beta: T2, gamma: T3) -> Self
+impl<S: Space, M: Projection<S>, P: Policy> GreedyGQ<S, M, P> {
+    pub fn new<T1, T2, T3>(q_func: Linear<S, M>, v_func: Linear<S, M>,
+                           policy: P, alpha: T1, beta: T2, gamma: T3) -> Self
         where T1: Into<Parameter>,
               T2: Into<Parameter>,
               T3: Into<Parameter>
@@ -50,35 +46,36 @@ impl<S: Space, Q, V, P> GreedyGQ<S, Q, V, P>
     }
 }
 
-impl<S: Space, Q, V, P> ControlAgent<S, ActionSpace> for GreedyGQ<S, Q, V, P>
-    where Q: QFunction<S> + Linear<S>,
-          V: VFunction<S> + Linear<S>,
-          P: Policy
+impl<S: Space, M: Projection<S>, P: Policy> ControlAgent<S, ActionSpace> for GreedyGQ<S, M, P>
 {
     fn pi(&mut self, s: &S::Repr) -> usize {
-        self.policy.sample(self.q_func.evaluate(s).as_slice())
+        let qs: Vec<f64> = self.q_func.evaluate(s);
+
+        self.policy.sample(qs.as_slice())
     }
 
     fn evaluate_policy<T: Policy>(&self, p: &mut T, s: &S::Repr) -> usize {
-        p.sample(self.q_func.evaluate(s).as_slice())
+        let qs: Vec<f64> = self.q_func.evaluate(s);
+
+        p.sample(qs.as_slice())
     }
 
     fn handle_transition(&mut self, t: &Transition<S, ActionSpace>) {
         let a = t.action;
         let (s, ns) = (t.from.state(), t.to.state());
 
-        let phi_s = self.q_func.phi(s);
-        let phi_ns = self.q_func.phi(ns);
+        let phi_s = self.q_func.project(s);
+        let phi_ns = self.q_func.project(ns);
 
         let td_error = t.reward +
             self.q_func.evaluate_action_phi(&(self.gamma.value()*&phi_ns - &phi_s), a);
-        let td_estimate = self.v_func.evaluate(s);
+        let td_estimate: f64 = self.v_func.evaluate(s);
 
         let update_q = td_error*&phi_s - self.gamma*td_estimate*phi_ns;
         let update_v = (td_error - td_estimate)*phi_s;
 
         self.q_func.update_action_phi(&update_q, a, self.alpha.value());
-        self.v_func.update_phi(&update_v, self.alpha*self.beta);
+        VFunction::update_phi(&mut self.v_func, &update_v, self.alpha*self.beta);
     }
 
     fn handle_terminal(&mut self, _: &S::Repr) {

diff --git a/src/agents/memory.rs b/src/agents/memory.rs
@@ -34,11 +34,11 @@ impl Trace {
 
     pub fn update(&mut self, phi: &Array1<f64>) {
         match self {
-            &mut Trace::Accumulating { ref mut eligibility, lambda } =>
+            &mut Trace::Accumulating { ref mut eligibility, .. } =>
             {
                 *eligibility += phi;
             },
-            &mut Trace::Replacing { ref mut eligibility, lambda } =>
+            &mut Trace::Replacing { ref mut eligibility, .. } =>
             {
                 eligibility.zip_mut_with(phi, |val, &p| {
                     *val = f64::min(1.0, *val + p);

diff --git a/src/agents/prediction/gtd.rs b/src/agents/prediction/gtd.rs
@@ -1,26 +1,21 @@
 use Parameter;
-use fa::{VFunction, Linear};
+use fa::{VFunction, Projection, Linear};
 use agents::PredictionAgent;
 use geometry::Space;
-use std::marker::PhantomData;
 
 
-pub struct GTD2<S: Space, V: VFunction<S> + Linear<S>>
+pub struct GTD2<S: Space, P: Projection<S>>
 {
-    v_func: V,
-    a_func: V,
+    v_func: Linear<S, P>,
+    a_func: Linear<S, P>,
 
     alpha: Parameter,
     beta: Parameter,
     gamma: Parameter,
-
-    phantom: PhantomData<S>,
 }
 
-impl<S: Space, V> GTD2<S, V>
-    where V: VFunction<S> + Linear<S>
-{
-    pub fn new<T1, T2, T3>(v_func: V, a_func: V,
+impl<S: Space, P: Projection<S>> GTD2<S, P> {
+    pub fn new<T1, T2, T3>(v_func: Linear<S, P>, a_func: Linear<S, P>,
                            alpha: T1, beta: T2, gamma: T3) -> Self
         where T1: Into<Parameter>,
               T2: Into<Parameter>,
@@ -37,18 +32,16 @@ impl<S: Space, V> GTD2<S, V>
             alpha: alpha.into(),
             beta: beta.into(),
             gamma: gamma.into(),
-
-            phantom: PhantomData,
         }
     }
 }
 
 impl<S: Space, V> PredictionAgent<S> for GTD2<S, V>
-    where V: VFunction<S> + Linear<S>
+    where V: VFunction<S> + Projection<S>
 {
     fn handle_transition(&mut self, s: &S::Repr, ns: &S::Repr, r: f64) -> Option<f64> {
-        let phi_s = self.v_func.phi(s);
-        let phi_ns = self.v_func.phi(ns);
+        let phi_s = self.v_func.project(s);
+        let phi_ns = self.v_func.project(ns);
 
         let td_error = r + self.gamma*self.v_func.evaluate_phi(&phi_ns) -
             self.v_func.evaluate_phi(&phi_s);
@@ -68,22 +61,18 @@ impl<S: Space, V> PredictionAgent<S> for GTD2<S, V>
 }
 
 
-pub struct TDC<S: Space, V: VFunction<S> + Linear<S>>
+pub struct TDC<S: Space, P: Projection<S>>
 {
-    v_func: V,
-    a_func: V,
+    v_func: Linear<S, P>,
+    a_func: Linear<S, P>,
 
     alpha: Parameter,
     beta: Parameter,
     gamma: Parameter,
-
-    phantom: PhantomData<S>,
 }
 
-impl<S: Space, V> TDC<S, V>
-    where V: VFunction<S> + Linear<S>
-{
-    pub fn new<T1, T2, T3>(v_func: V, a_func: V,
+impl<S: Space, P: Projection<S>> TDC<S, P> {
+    pub fn new<T1, T2, T3>(v_func: Linear<S, P>, a_func: Linear<S, P>,
                            alpha: T1, beta: T2, gamma: T3) -> Self
         where T1: Into<Parameter>,
               T2: Into<Parameter>,
@@ -100,18 +89,16 @@ impl<S: Space, V> TDC<S, V>
             alpha: alpha.into(),
             beta: beta.into(),
             gamma: gamma.into(),
-
-            phantom: PhantomData,
         }
     }
 }
 
 impl<S: Space, V> PredictionAgent<S> for TDC<S, V>
-    where V: VFunction<S> + Linear<S>
+    where V: VFunction<S> + Projection<S>
 {
     fn handle_transition(&mut self, s: &S::Repr, ns: &S::Repr, r: f64) -> Option<f64> {
-        let phi_s = self.v_func.phi(s);
-        let phi_ns = self.v_func.phi(ns);
+        let phi_s = self.v_func.project(s);
+        let phi_ns = self.v_func.project(ns);
 
         let td_error = r + self.gamma*self.v_func.evaluate_phi(&phi_ns) -
             self.v_func.evaluate_phi(&phi_s);

diff --git a/src/agents/prediction/td.rs b/src/agents/prediction/td.rs
@@ -1,5 +1,5 @@
 use Parameter;
-use fa::{VFunction, Linear};
+use fa::{VFunction, Projection, Linear};
 use agents::PredictionAgent;
 use agents::memory::Trace;
 use geometry::Space;
@@ -55,42 +55,35 @@ impl<S: Space, V> PredictionAgent<S> for TD<S, V>
 }
 
 
-pub struct TDLambda<S: Space, V: VFunction<S> + Linear<S>>
+pub struct TDLambda<S: Space, P: Projection<S>>
 {
-    v_func: V,
     trace: Trace,
+    v_func: Linear<S, P>,
 
     alpha: Parameter,
     gamma: Parameter,
-
-    phantom: PhantomData<S>,
 }
 
-impl<S: Space, V> TDLambda<S, V>
-    where V: VFunction<S> + Linear<S>
-{
-    pub fn new<T1, T2>(v_func: V, trace: Trace, alpha: T1, gamma: T2) -> Self
+impl<S: Space, P: Projection<S>> TDLambda<S, P> {
+    pub fn new<T1, T2>(trace: Trace, v_func: Linear<S, P>,
+                       alpha: T1, gamma: T2) -> Self
         where T1: Into<Parameter>,
               T2: Into<Parameter>
     {
         TDLambda {
-            v_func: v_func,
             trace: trace,
+            v_func: v_func,
 
             alpha: alpha.into(),
             gamma: gamma.into(),
-
-            phantom: PhantomData,
         }
     }
 }
 
-impl<S: Space, V> PredictionAgent<S> for TDLambda<S, V>
-    where V: VFunction<S> + Linear<S>
-{
+impl<S: Space, P: Projection<S>> PredictionAgent<S> for TDLambda<S, P> {
     fn handle_transition(&mut self, s: &S::Repr, ns: &S::Repr, r: f64) -> Option<f64> {
-        let phi_s = self.v_func.phi(s);
-        let phi_ns = self.v_func.phi(ns);
+        let phi_s = self.v_func.project(s);
+        let phi_ns = self.v_func.project(ns);
 
         self.trace.decay(self.gamma.value());
         self.trace.update(&phi_s);