Skip to content

Commit

Permalink
scale support for kcf + r,g,b color channels use as features
Browse files Browse the repository at this point in the history
  • Loading branch information
Tomas Vojir committed Aug 6, 2016
1 parent 733d5fb commit 8d7f23b
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 54 deletions.
4 changes: 3 additions & 1 deletion .gitignore
@@ -1,4 +1,6 @@
build
build*
.idea
main_test.cpp
src/complexmat_cv.hpp
*TODO*

8 changes: 2 additions & 6 deletions main_vot.cpp
Expand Up @@ -16,13 +16,9 @@ int main()
vot_io.outputBoundingBox(init_rect);
vot_io.getNextImage(image);

BBox_c bb;
bb.cx = init_rect.x + init_rect.width/2.;
bb.cy = init_rect.y + init_rect.height/2.;
bb.w = init_rect.width;
bb.h = init_rect.height;
tracker.init(image, bb);
tracker.init(image, init_rect);

BBox_c bb;
double avg_time = 0.;
int frames = 0;
while (vot_io.getNextImage(image) == 1){
Expand Down
208 changes: 165 additions & 43 deletions src/kcf.cpp
@@ -1,38 +1,87 @@
#include "kcf.h"
#include <numeric>

void KCF_Tracker::init(cv::Mat &img, BBox_c &bbox)
void KCF_Tracker::init(cv::Mat &img, const cv::Rect & bbox)
{
p_pose = bbox;
//check boundary, enforce min size
double x1 = bbox.x, x2 = bbox.x + bbox.width, y1 = bbox.y, y2 = bbox.y + bbox.height;
if (x1 < 0) x1 = 0.;
if (x2 > img.cols-1) x2 = img.cols - 1;
if (y1 < 0) y1 = 0;
if (y2 > img.rows-1) y2 = img.rows - 1;

if (x2-x1 < 2*p_cell_size) {
double diff = (2*p_cell_size -x2+x1)/2.;
if (x1 - diff >= 0 && x2 + diff < img.cols){
x1 -= diff;
x2 += diff;
} else if (x1 - 2*diff >= 0) {
x1 -= 2*diff;
} else {
x2 += 2*diff;
}
}
if (y2-y1 < 2*p_cell_size) {
double diff = (2*p_cell_size -y2+y1)/2.;
if (y1 - diff >= 0 && y2 + diff < img.rows){
y1 -= diff;
y2 += diff;
} else if (y1 - 2*diff >= 0) {
y1 -= 2*diff;
} else {
y2 += 2*diff;
}
}

cv::Mat input;
p_pose.w = x2-x1;
p_pose.h = y2-y1;
p_pose.cx = x1 + p_pose.w/2.;
p_pose.cy = y1 + p_pose.h/2.;

cv::Mat input_gray, input_rgb = img.clone();
if (img.channels() == 3){
cv::cvtColor(img, input, CV_BGR2GRAY);
input.convertTo(input, CV_32FC1);
cv::cvtColor(img, input_gray, CV_BGR2GRAY);
input_gray.convertTo(input_gray, CV_32FC1);
}else
img.convertTo(input, CV_32FC1);
img.convertTo(input_gray, CV_32FC1);

// don't need too large image
if (bbox.w*bbox.h > 100.*100.) {
// NOTE : NEVER SCALE DOWN FOR NOW
if (p_pose.w * p_pose.h > 100.*100.) {
std::cout << "resizing image by factor of 2" << std::endl;
p_resize_image = true;
p_pose.scale(0.5);
cv::resize(input, input, cv::Size(0,0), 0.5, 0.5, cv::INTER_CUBIC);
cv::resize(input_gray, input_gray, cv::Size(0,0), 0.5, 0.5, cv::INTER_AREA);
cv::resize(input_rgb, input_rgb, cv::Size(0,0), 0.5, 0.5, cv::INTER_AREA);
}

p_windows_size[0] = floor(p_pose.w * (1. + p_padding));
p_windows_size[1] = floor(p_pose.h * (1. + p_padding));
//compute win size + fit to fhog cell size
p_windows_size[0] = round(p_pose.w * (1. + p_padding) / p_cell_size) * p_cell_size;
p_windows_size[1] = round(p_pose.h * (1. + p_padding) / p_cell_size) * p_cell_size;

if (m_use_scale)
for (int i = -p_num_scales/2; i < p_num_scales/2; ++i)
p_scales.push_back(std::pow(p_scale_step, i));
else
p_scales.push_back(1.);

p_current_scale = 1.;
double min_size_ratio = std::max(2.*p_cell_size/p_windows_size[0], 2.*p_cell_size/p_windows_size[1]);
double max_size_ratio = std::min(floor(img.cols/p_cell_size)*p_cell_size/p_windows_size[0], floor(img.rows/p_cell_size)*p_cell_size/p_windows_size[1]);
p_min_max_scale[0] = std::pow(p_scale_step, std::ceil(std::log(min_size_ratio) / log(p_scale_step)));
p_min_max_scale[1] = std::pow(p_scale_step, std::floor(std::log(max_size_ratio) / log(p_scale_step)));

std::cout << " min max scales: " << p_min_max_scale[0] << " " << p_min_max_scale[1] << std::endl;

p_output_sigma = std::sqrt(p_pose.w*p_pose.h) * p_output_sigma_factor / static_cast<double>(p_cell_size);

//window weights, i.e. labels
p_yf = fft2(gaussian_shaped_labels(p_output_sigma, p_windows_size[0]/p_cell_size, p_windows_size[1]/p_cell_size));

p_cos_window = cosine_window_function(p_yf.cols, p_yf.rows);

//obtain a sub-window for training initial model
cv::Mat patch = get_subwindow(input, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);
p_model_xf = fft2(p_fhog.extract(patch, 2, p_cell_size, 9), p_cos_window);
std::vector<cv::Mat> path_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);
p_model_xf = fft2(path_feat, p_cos_window);
//Kernel Ridge Regression, calculate alphas (in Fourier domain)
ComplexMat kf = gaussian_correlation(p_model_xf, p_model_xf, p_kernel_sigma, true);

Expand All @@ -45,7 +94,7 @@ void KCF_Tracker::init(cv::Mat &img, BBox_c &bbox)

void KCF_Tracker::setTrackerPose(BBox_c &bbox, cv::Mat & img)
{
init(img, bbox);
init(img, bbox.get_rect());
}

void KCF_Tracker::updateTrackerPosition(BBox_c &bbox)
Expand Down Expand Up @@ -73,43 +122,71 @@ BBox_c KCF_Tracker::getBBox()

void KCF_Tracker::track(cv::Mat &img)
{
cv::Mat input;
cv::Mat input_gray, input_rgb = img.clone();
if (img.channels() == 3){
cv::cvtColor(img, input, CV_BGR2GRAY);
input.convertTo(input, CV_32FC1);
cv::cvtColor(img, input_gray, CV_BGR2GRAY);
input_gray.convertTo(input_gray, CV_32FC1);
}else
img.convertTo(input, CV_32FC1);
img.convertTo(input_gray, CV_32FC1);

// don't need too large image
if (p_resize_image)
cv::resize(input, input, cv::Size(0,0), 0.5, 0.5, cv::INTER_CUBIC);

cv::Mat patch = get_subwindow(input, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);
ComplexMat zf = fft2(p_fhog.extract(patch, 2, p_cell_size, 9), p_cos_window);
ComplexMat kzf = gaussian_correlation(zf, p_model_xf, p_kernel_sigma);
cv::Mat response = ifft2(p_model_alphaf*kzf);
//std::cout << response << std::endl;

/* target location is at the maximum response. we must take into
account the fact that, if the target doesn't move, the peak
will appear at the top-left corner, not at the center (this is
discussed in the paper). the responses wrap around cyclically. */
double min_val, max_val;
cv::Point2i min_loc, max_loc;
cv::minMaxLoc(response, &min_val, &max_val, &min_loc, &max_loc);

if (max_loc.y > zf.rows/2) //wrap around to negative half-space of vertical axis
max_loc.y = max_loc.y - zf.rows;
if (max_loc.x > zf.cols/2) //same for horizontal axis
max_loc.x = max_loc.x - zf.cols;
if (p_resize_image) {
cv::resize(input_gray, input_gray, cv::Size(0, 0), 0.5, 0.5, cv::INTER_AREA);
cv::resize(input_rgb, input_rgb, cv::Size(0, 0), 0.5, 0.5, cv::INTER_AREA);
}

std::vector<cv::Mat> patch_feat;
int max_x = -1, max_y = -1;
double max_response = -1.;
int scale_index = 0;
for (size_t i = 0; i < p_scales.size(); ++i) {
patch_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale*p_scales[i]);
ComplexMat zf = fft2(patch_feat, p_cos_window);
ComplexMat kzf = gaussian_correlation(zf, p_model_xf, p_kernel_sigma);
cv::Mat response = ifft2(p_model_alphaf * kzf);
//std::cout << response << std::endl;

/* target location is at the maximum response. we must take into
account the fact that, if the target doesn't move, the peak
will appear at the top-left corner, not at the center (this is
discussed in the paper). the responses wrap around cyclically. */
double min_val, max_val;
cv::Point2i min_loc, max_loc;
cv::minMaxLoc(response, &min_val, &max_val, &min_loc, &max_loc);

if (max_loc.y > zf.rows / 2) //wrap around to negative half-space of vertical axis
max_loc.y = max_loc.y - zf.rows;
if (max_loc.x > zf.cols / 2) //same for horizontal axis
max_loc.x = max_loc.x - zf.cols;

if (max_val > max_response){
max_response = max_val;
max_x = max_loc.x;
max_y = max_loc.y;
scale_index = i;
}
}

//shift bbox, no scale change
p_pose.cx += p_cell_size * max_loc.x;
p_pose.cy += p_cell_size * max_loc.y;
p_current_scale *= p_scales[scale_index];
if (p_current_scale < p_min_max_scale[0])
p_current_scale = p_min_max_scale[0];
if (p_current_scale > p_min_max_scale[1])
p_current_scale = p_min_max_scale[1];

p_pose.cx += p_cell_size * max_x;
p_pose.cy += p_cell_size * max_y;
if (p_pose.cx < 0) p_pose.cx = 0;
if (p_pose.cx > img.cols-1) p_pose.cx = img.cols-1;
if (p_pose.cy < 0) p_pose.cy = 0;
if (p_pose.cy > img.rows-1) p_pose.cy = img.rows-1;

p_pose.w *= p_scales[scale_index];
p_pose.h *= p_scales[scale_index];

//obtain a subwindow for training at newly estimated target position
patch = get_subwindow(input, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1]);
ComplexMat xf = fft2(p_fhog.extract(patch, 2, p_cell_size, 9), p_cos_window);
patch_feat = get_features(input_rgb, input_gray, p_pose.cx, p_pose.cy, p_windows_size[0], p_windows_size[1], p_current_scale);
ComplexMat xf = fft2(patch_feat, p_cos_window);
//Kernel Ridge Regression, calculate alphas (in Fourier domain)
ComplexMat kf = gaussian_correlation(xf, xf, p_kernel_sigma, true);

Expand All @@ -128,6 +205,51 @@ void KCF_Tracker::track(cv::Mat &img)

// ****************************************************************************

std::vector<cv::Mat> KCF_Tracker::get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale)
{
int size_x_scaled = floor(size_x*scale);
int size_y_scaled = floor(size_y*scale);

cv::Mat patch_gray = get_subwindow(input_gray, cx, cy, size_x_scaled, size_y_scaled);
cv::Mat patch_rgb = get_subwindow(input_rgb, cx, cy, size_x_scaled, size_y_scaled);

//resize to default size
if (scale > 1.){
//if we downsample use INTER_AREA interpolation
cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_AREA);
}else {
cv::resize(patch_gray, patch_gray, cv::Size(size_x, size_y), 0., 0., cv::INTER_LINEAR);
}

// get hog features
std::vector<cv::Mat> hog_feat = p_fhog.extract(patch_gray, 2, p_cell_size, 9);

//get color rgb features (simple r,g,b channels)
std::vector<cv::Mat> color_feat;
if (m_use_color) {
//resize to default size
if (scale > 1.){
//if we downsample use INTER_AREA interpolation
cv::resize(patch_rgb, patch_rgb, cv::Size(size_x/p_cell_size, size_y/p_cell_size), 0., 0., cv::INTER_AREA);
}else {
cv::resize(patch_rgb, patch_rgb, cv::Size(size_x/p_cell_size, size_y/p_cell_size), 0., 0., cv::INTER_LINEAR);
}

patch_rgb.convertTo(patch_rgb, CV_32F, 1. / 255., -0.5);

if (patch_rgb.channels() == 3) {
cv::Mat b(patch_rgb.size(), CV_32FC1);
cv::Mat g(patch_rgb.size(), CV_32FC1);
cv::Mat r(patch_rgb.size(), CV_32FC1);
color_feat = {b, g, r};
cv::split(patch_rgb, color_feat);
}
}
hog_feat.insert(hog_feat.end(), color_feat.begin(), color_feat.end());

return hog_feat;
}

cv::Mat KCF_Tracker::gaussian_shaped_labels(double sigma, int dim1, int dim2)
{
cv::Mat labels(dim2, dim1, CV_32FC1);
Expand Down
21 changes: 17 additions & 4 deletions src/kcf.h
Expand Up @@ -10,17 +10,26 @@ struct BBox_c
{
double cx, cy, w, h;

inline void scale(double factor){
inline void scale(double factor)
{
cx *= factor;
cy *= factor;
w *= factor;
h *= factor;
}

inline cv::Rect get_rect()
{
return cv::Rect(cx-w/2., cy-h/2., w, h);
}

};

class KCF_Tracker
{
public:
bool m_use_scale {true};
bool m_use_color {true};

/*
padding ... extra area surrounding the target (1.5)
Expand All @@ -36,7 +45,7 @@ class KCF_Tracker
KCF_Tracker() {}

// Init/re-init methods
void init(cv::Mat & img, BBox_c & bbox);
void init(cv::Mat & img, const cv::Rect & bbox);
void setTrackerPose(BBox_c & bbox, cv::Mat & img);
void updateTrackerPosition(BBox_c & bbox);

Expand All @@ -57,6 +66,11 @@ class KCF_Tracker
int p_cell_size = 4; //4 for hog (= bin_size)
int p_windows_size[2];
cv::Mat p_cos_window;
int p_num_scales {7};
double p_scale_step = 1.01;
double p_current_scale = 1.;
double p_min_max_scale[2];
std::vector<double> p_scales;

FHoG p_fhog; //class encapsulating hog feature extraction

Expand All @@ -76,9 +90,8 @@ class KCF_Tracker
ComplexMat fft2(const cv::Mat & input);
ComplexMat fft2(const std::vector<cv::Mat> & input, const cv::Mat & cos_window);
cv::Mat ifft2(const ComplexMat & inputf);
std::vector<cv::Mat> get_features(cv::Mat & input_rgb, cv::Mat & input_gray, int cx, int cy, int size_x, int size_y, double scale = 1.);

//tests
friend void run_tests(KCF_Tracker & tracker, const std::vector<bool> & tests);
};

#endif //KCF_HEADER_6565467831231

0 comments on commit 8d7f23b

Please sign in to comment.