Adding test, fleshing out train. checkpoint.

wg-perception · Sep 16, 2011 · 8f8ca9e · 8f8ca9e
1 parent a60041d
commit 8f8ca9e
Show file tree

Hide file tree

Showing 8 changed files with 248 additions and 21 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+*.mmod
 build
 .cproject
 .project

diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt
@@ -12,6 +12,7 @@ ecto_python_env_gen(${CMAKE_BINARY_DIR}/lib)
 ectomodule(mmod
  module.cpp
  MModTrainer.cpp
+ MModTester.cpp
  MModPersister.cpp
 )
 

diff --git a/modules/MModPersister.cpp b/modules/MModPersister.cpp
@@ -1,6 +1,7 @@
 #include <ecto/ecto.hpp>
 #include <opencv2/core/core.hpp>
 #include <boost/archive/text_oarchive.hpp>
+#include <fstream>
 
 #include "mmod_objects.h"  //For train and test
 #include "mmod_color.h"    //For depth and color processing (yes, I should change the name)
@@ -13,38 +14,47 @@ namespace mmod
     static void
     declare_params(tendrils& p)
     {
+      p.declare<std::string> ("filename", "Output file name to save training to.");
     }
 
     static void
     declare_io(const tendrils& p, tendrils& i, tendrils& o)
     {
-      i.declare<mmod_objects>("templates");
-      o.declare<std::string>("data", "Data string.");
-      o.declare<std::string>("mime", "Mime type", "application/octet-stream");
+      i.declare<mmod_objects> ("templates");
+
+      //TODO output the serialized data.
+      //      o.declare<std::string>("data", "Data string.");
+      //      o.declare<std::string>("mime", "Mime type", "application/octet-stream");
     }
 
     void
     configure(const tendrils& p, const tendrils& i, const tendrils& o)
     {
       //outputs
-      trainer_ = i["templates"];
-      data_ = o["data"];
-      mime_ = o["mime"];
+      templates_ = i["templates"];
+      //TODO outputs
+      //      data_ = o["data"];
+      //      mime_ = o["mime"];
+      //parameters
+      p["filename"] >> filename_;
     }
 
     int
     process(const tendrils& i, const tendrils& o)
     {
-      std::stringstream ss;
-      boost::archive::text_oarchive oa(ss);
-      ss << trainer_;
-      *data_ = ss.str();
+      //serialize file to disk for now.
+      std::ofstream file(filename_.c_str());
+      boost::archive::text_oarchive oa(file);
+      oa << *templates_;
       return ecto::OK;
     }
     //inputs
-    spore<mmod_objects> trainer_; //Object train and test.
-    //outputs
-    spore<std::string> mime_, data_;
+    spore<mmod_objects> templates_; //Object train and test.
+    //outputs TODO
+    //    spore<std::string> mime_, data_;
+
+    std::string filename_;
   };
 }
-ECTO_CELL(mmod, mmod::MModPersister, "MModPersister", "An mmod template persister.");
+ECTO_CELL(mmod, mmod::MModPersister, "MModPersister", "An mmod template persister.")
+;
diff --git a/modules/MModTester.cpp b/modules/MModTester.cpp
@@ -0,0 +1,123 @@
+#include <ecto/ecto.hpp>
+#include <opencv2/core/core.hpp>
+#include <boost/archive/text_iarchive.hpp>
+#include <fstream>
+
+#include "mmod_objects.h"  //For train and test
+#include "mmod_color.h"    //For depth and color processing (yes, I should change the name)
+namespace mmod
+{
+  using ecto::tendrils;
+  using ecto::spore;
+  struct MModTester
+  {
+    static void
+    declare_params(tendrils& p)
+    {
+      p.declare<std::string> ("filename", "Output file name to save training to.");
+
+      p.declare<float> ("thresh_learn", "The threshold for learning a new template", 0.8);
+      p.declare<float> ("thresh_match", "The threshold for learning a new template", 0.85);
+      p.declare<float> (
+                        "frac_overlap",
+                        "the fraction of overlap between 2 above threshold feature's bounding box rectangles that constitutes 'overlap'",
+                        0.6);
+      p.declare<int> ("skip_x", "Control sparse testing of the feature images", 2);
+      p.declare<int> ("skip_y", "Control sparse testing of the feature images", 2);
+    }
+
+    static void
+    declare_io(const tendrils& p, tendrils& i, tendrils& o)
+    {
+      i.declare<cv::Mat> ("image", "An image. BGR image of type CV_8UC3").required(true);
+      i.declare<cv::Mat> ("depth", "Depth image of type CV_16UC1").required(true);
+      i.declare<cv::Mat> ("mask", "Object mask of type CV_8UC1 or CV_8UC3").required(false);
+      o.declare<cv::Mat> ("debug_image", "Debug image.");
+    }
+
+    void
+    configure(const tendrils& p, const tendrils& i, const tendrils& o)
+    {
+      std::string filename;
+      //parameters
+      p["filename"] >> filename;
+      thresh_match_ = p["thresh_match"];
+      frac_overlap_ = p["frac_overlap"];
+      skip_x_ = p["skip_x"];
+      skip_y_ = p["skip_y"];
+
+      //deserialize from file.
+      std::ifstream file(filename.c_str());
+      boost::archive::text_iarchive ia(file);
+      ia >> templates_;
+
+      // inputs
+      image_ = i["image"];
+      mask_ = i["mask"];
+      depth_ = i["depth"];
+
+      //outputs
+      debug_image_ = o["debug_image"];
+    }
+
+    int
+    process(const tendrils& i, const tendrils& o)
+    {
+      //iputs spores are like smart pointers, dereference to get at under
+      //lying data type.
+      cv::Mat image = *image_, mask = *mask_;
+
+      cv::Mat depth;
+      if (depth_->type() == CV_32FC1)
+      {
+        depth_->convertTo(depth, CV_16U, 1 / 1000.0);
+      }
+      else if (depth_->type() == CV_16UC1)
+      {
+        depth = *depth_;
+      }
+      else
+      {
+        throw std::logic_error(
+                               "You must supply us with either a CV_32FC1 or CV_16UC1 depth map. Floating point in meters, fixed in mm.");
+      }
+      //run detections.
+      //TEST (note that you can also match_all_objects_at_a_point(...):
+      calcColor.computeColorWTA(image, colorfeat, mask);
+      calcDepth.computeDepthWTA(depth, depthfeat, mask);
+      FeatModes.clear();
+      FeatModes.push_back(colorfeat);
+      FeatModes.push_back(depthfeat);
+
+      templates_.match_all_objects(FeatModes, modesCD, mask, *thresh_match_, *frac_overlap_, *skip_x_, *skip_y_);
+
+      //TO DISPLAY MATCHES (NON-MAX SUPPRESSED)
+      cv::Mat debug_image;
+      image.copyTo(debug_image);
+      templates_.draw_matches(debug_image); //draw results...
+
+      *debug_image_ = debug_image;
+      return ecto::OK;
+    }
+
+    colorwta calcColor; //Feature processing
+    depthwta calcDepth; //Feature processing
+    std::vector<cv::Mat> FeatModes; //List of images
+    std::vector<std::string> modesCD; //Names of modes (color and depth)
+    cv::Mat colorfeat, depthfeat; //To hold feature outputs. These will be CV_8UC1 images
+
+    mmod_objects templates_; //Object train and test.
+
+    //params
+    spore<float> thresh_match_, frac_overlap_;
+    spore<int> skip_x_, skip_y_;
+
+    //inputs
+    spore<cv::Mat> image_, mask_, depth_;
+
+    //outputs
+    spore<cv::Mat> debug_image_;
+  };
+}
+ECTO_CELL(mmod, mmod::MModTester, "MModTester", "An mmod template detector.")
+;
diff --git a/modules/MModTrainer.cpp b/modules/MModTrainer.cpp
@@ -82,8 +82,7 @@ namespace mmod
       FeatModes.push_back(depthfeat);
 
       //LEARN A TEMPLATE (for now, it will slow down with each view learned).
-      std::string sT1("ST1"), oT("T"), oX("X");
-      /*int num_templ = */
+      //the object_id and session_id are dynamically changed by python or otherwise.
       trainer_->learn_a_template(FeatModes, modesCD, *mask_, *session_id_, *object_id_, *frame_number_, *thresh_learn_);
       return ecto::OK;
     }

diff --git a/modules/test_mmod.py b/modules/test_mmod.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+import sys
+import argparse
+import time
+import tempfile
+import os
+import math
+import subprocess
+
+import couchdb
+
+import ecto
+from ecto_opencv import calib, highgui, imgproc
+import object_recognition
+from object_recognition import dbtools, models, capture, observations
+
+from mmod import MModTester
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Computes a surface mesh of an object in the database')
+    parser.add_argument('-i', '--training', metavar='TRAINING_FILE', dest='training', type=str, default='',
+                       help='The training file')
+    args = parser.parse_args()
+    if args.training == '':
+        parser.print_usage()
+        print 'You must supply a training file.'
+        sys.exit(1)
+    return args
+
+def kinect_highres(device_n=0):
+    from ecto_openni import Capture, ResolutionMode, Device
+    return Capture('ni device', rgb_resolution=ResolutionMode.SXGA_RES,
+                   depth_resolution=ResolutionMode.VGA_RES,
+                   rgb_fps=15, depth_fps=30,
+                   device_number=device_n,
+                   registration=True,
+                   synchronize=False,
+                   device=Device.KINECT
+                   )
+
+def test_mmod(args):
+    kinect_raw = kinect_highres()
+    kinect_cv = highgui.NiConverter('Kinect CV')
+    rescale_depth = capture.RescaledRegisteredDepth() #this is for SXGA mode scale handling.
+    plasm = ecto.Plasm()
+
+    #connect up the kinect as input
+    plasm.connect(
+        kinect_raw[:] >> kinect_cv[:],
+        kinect_cv['image'] >> rescale_depth['image'],
+        kinect_cv['depth'] >> rescale_depth['depth'],
+    )
+
+    #hook up the tester
+    mmod_tester = MModTester(filename=args.training)
+    plasm.connect(
+        kinect_cv['image'] >> mmod_tester['image'],
+        rescale_depth['depth'] >> mmod_tester['depth'],
+    )
+
+    #visualize raw data
+    fps = highgui.FPSDrawer()
+    plasm.connect(
+          kinect_cv['image'] >> fps[:],
+          fps[:] >> highgui.imshow('image', name='image')[:],
+          kinect_cv['depth'] >> highgui.imshow('depth', name='depth')[:],
+          mmod_tester['debug_image'] >> highgui.imshow('mmod debug', name='mmod depth')[:],
+
+          )
+    sched = ecto.schedulers.Singlethreaded(plasm)
+    sched.execute()
+
+if "__main__" == __name__:
+    args = parse_args()
+    test_mmod(args)
diff --git a/modules/train_mmod.py b/modules/train_mmod.py
@@ -21,6 +21,8 @@ def parse_args():
     parser = argparse.ArgumentParser(description='Computes a surface mesh of an object in the database')
     parser.add_argument('-s', '--session_id', metavar='SESSION_ID', dest='session_id', type=str, default='',
                        help='The session id to reconstruct.')
+    parser.add_argument('-o', '--output', metavar='OUTPUTFILE', dest='output', type=str, default='trained.mmod',
+                       help='The output file of training.')
     parser.add_argument('--all', dest='compute_all', action='store_const',
                         const=True, default=False,
                         help='Compute templates for all possible sessions.')
@@ -36,11 +38,10 @@ def parse_args():
     return args
 
 
-def train_mmod(obj_ids, args):
+def train_mmod(mmod_trainer, persister, obj_ids, args):
     db_reader = capture.ObservationReader('db_reader', db_url=args.db_root, collection='observations')
     observation_dealer = ecto.Dealer(typer=db_reader.inputs.at('observation'), iterable=obj_ids)
     db_reader = capture.ObservationReader('db_reader', db_url=args.db_root, collection='observations')
-    depthTo3d = calib.DepthTo3d()
     erode = imgproc.Erode(kernel=3) #-> 7x7
     rescale_depth = capture.RescaledRegisteredDepth() #this is for SXGA mode scale handling.
     plasm = ecto.Plasm()
@@ -52,14 +53,22 @@ def train_mmod(obj_ids, args):
         db_reader['mask'] >> erode['image'],
     )
 
+    print "Training session_id:", str(session.id), " object_id:", str(session.object_id)
+    #set the session id, and the object id fo this guy.
+    mmod_trainer.params.session_id = str(session.id)
+    mmod_trainer.params.object_id = str(session.object_id)
     #hook up the trainer
-    mmod_trainer = MModTrainer(session_id=session.id, object_id=str(session.object_id))
     plasm.connect(
         erode['image'] >> mmod_trainer['mask'],
         db_reader['image'] >> mmod_trainer['image'],
         rescale_depth['depth'] >> mmod_trainer['depth'],
         db_reader['frame_number'] >> mmod_trainer['frame_number'],
     )
+
+    #persistance
+    persisert_if = ecto.If(cell=persister) #this is so it never runs.
+    #only connect the If, not the persister.
+    plasm.connect(mmod_trainer['templates'] >> persisert_if['templates'])
 
     if args.visualize:
         plasm.connect(
@@ -79,16 +88,23 @@ def train_mmod(obj_ids, args):
 
     sessions = dbs['sessions']
     observations = dbs['observations']
+
+    mmod_trainer = MModTrainer(session_id='NA', object_id='NA')
+    persister = MModPersister(filename="trained_all.mmod")
+
     if args.compute_all:
         results = models.Session.all(sessions)
         for session in results:
             obs_ids = models.find_all_observations_for_session(observations, session.id)
-            train_mmod(obs_ids, args)
+            train_mmod(mmod_trainer, persister, obs_ids, args)
 
     else:
         session = models.Session.load(sessions, args.session_id)
         if session == None or session.id == None:
             print "Could not load session with id:", args.session_id
             sys.exit(1)
         obs_ids = models.find_all_observations_for_session(observations, session.id)
-        train_mmod(obs_ids, args)
+        train_mmod(mmod_trainer, persister, obs_ids, args)
+
+    #the inputs should still be valid, so just thunk process once to save to disk
+    persister.process()
diff --git a/src/mmod_objects.cpp b/src/mmod_objects.cpp
@@ -52,6 +52,7 @@ using namespace std;
 		vector<string>::iterator ii;		//Object ID iterator (object names)
 		vector<vector<int> >::iterator fitr;//Feature indices iterator
 		int len = (int)rv.size();
+		if(rv.empty()) len = 1;
 		int Dcolor = 150/len;
 		Scalar color(255,255,255);
 		string stringscore;