traintest.m

function traintest(varargin)
% RECOGNITION_DEMO  Demonstrates using VLFeat for image classification
%	CONFIGURATIONS (FIXED)
%	PREFIX, method name used for classification		
%	TYPE, classification method
%	DATASET, dataset name for classification
%	DATASETDIR, dataset position
%	SEED, random seed for reproducing classification results
%	
%	CONFIGURATIONS (TO BE CHANGED)
%	NUMWORDS, number of words for clustering
%	LITE, light task option for testing
%	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%	PRODUCTS, subspaces number for products quantization
%	TRANSFORM, optimized products quantization type
%
%	SVM parameters including:
%		kernel type (linear or nonlinear)
%		svm classification threshold
%		svm training parameters
%
%	ENCODERPARAMS, encoder parameters package including:
%		encoding method type
%		number of Words in codebook
%		spatial pyramids option
%		geometric information augmented version
%		PCA dimension reduction
%		feature extractor type
%		whitening option and whitening regularization parameters
%		renormalise option

% --------------------------------------------------------------------
%                                                       Initialization
% --------------------------------------------------------------------

%% check whether VLFeat is installed successfully
if ~exist('vl_version')
	run(fullfile(fileparts(which(mfilename)), ...
               '..', '..', 'toolbox', 'vl_setup.m')) ;
end

%% check whether features have all been extracted
opts.featureExtracted = true;

%% default parameters in caltech101 classification task
opts.dataset = 'caltech101' ;
opts.prefix = 'bovw' ;

%% backup directory
opts.datasetDir = '';
opts.experimentDir = '';

%% for test
opts.lite = true ;

%% randomness control
opts.seed = 1 ;

%% classification parameters
opts.C = 1 ;
opts.kernel = 'linear' ;

%% encoding parameters
opts.extractorFn = @getDenseSift;
opts.encoderParams = {'type', 'bovw'} ;
opts.transformParams = {...
						'numPcaDimensions', +inf, ...
						'transform', 'none'};

%% novel settings for product quantization
opts.products = 1;
%% partition settings
opts.partition = 'none' ;

%% configuring intermediate data path
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%***** why two times? *****%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for pass = 1:2
	opts.featuresPath = fullfile(opts.experimentDir, 'features', opts.dataset);
	opts.imdbPath = fullfile(opts.experimentDir, sprintf('%s-imdb.mat', opts.dataset)) ;		% image database
	%% intermediate results
	opts.resultDir = fullfile(opts.experimentDir, opts.prefix, ['PCA+' opts.transformParams{4}]) ;
	opts.modelPath = fullfile(opts.resultDir, 'model.mat') ; 	% svm model
	opts = vl_argparse(opts,varargin) ;
end

%% do not do anything if the result data already exist
if exist(fullfile(opts.resultDir,'result.mat')),
	load(fullfile(opts.resultDir,'result.mat'), 'ap', 'confusion') ;
	fprintf('%35s mAP = %04.1f, mean acc = %04.1f\n', opts.prefix, ...
          100*mean(ap), 100*mean(diag(confusion))) ;
  return ;
end

%% creating folders
if ~exist(opts.featuresPath, 'dir')
	vl_xmkdir(opts.featuresPath) ;
end
%% create folder to store intermediate results
if ~exist(opts.resultDir, 'dir')
	vl_xmkdir(opts.resultDir);
end

%% diary store all the output information of the program
opts.diaryPath = fullfile(opts.resultDir, 'diary.txt') ;	% log
diary(opts.diaryPath) ; 
diary on ;
%% options check
disp('options:' ); 
disp(opts) ;

% --------------------------------------------------------------------
%                                                   Get image database
% --------------------------------------------------------------------

%% [lite] option will enable the program to randomly 
%% pick up small amount of images from  the database
if exist(opts.imdbPath)
  imdb = load(opts.imdbPath);
else
 switch opts.dataset
   case 'scene67', imdb = setupScene67(opts.datasetDir, 'lite', opts.lite) ;
   case 'caltech101', imdb = setupCaltech256(opts.datasetDir, 'lite', opts.lite, ...
                                             'variant', 'caltech101', 'seed', opts.seed) ;
   case 'caltech256', imdb = setupCaltech256(opts.datasetDir, 'lite', opts.lite) ;
   case 'voc07', imdb = setupVoc(opts.datasetDir, 'lite', opts.lite, 'edition', '2007') ;
   case 'fmd', imdb = setupFMD(opts.datasetDir, 'lite', opts.lite) ;
   otherwise, error('Unknown dataset type.') ;
 end
 % save all the fields from imdb
 save(opts.imdbPath, '-struct', 'imdb') ;
end

% --------------------------------------------------------------------
%                                                     Extract Features
% --------------------------------------------------------------------

if ~opts.featureExtracted
	%% Initialization %%
	%% all images list
	imageList = cellfun(@(x) fullfile(imdb.imageDir, x), imdb.images.name, 'uniform', 0);
	%% number of all the images
	numImages = numel(imageList) ;

	%% extract all the features into disk
	%% corresponding to each image
	fprintf('extracting features:          ');
	for indexAll = 1 : numImages
		%% reading images
		fprintf('\b\b\b\b\b\b\b\b\b%04d/%04d', indexAll, numImages) ;
		im = readImage(imageList{indexAll}) ;
		
		%% extract image features and store them in local disk
		[~, name, ~] = fileparts(imageList{indexAll});
		imagePath = fullfile(opts.featuresPath, strcat(name, '.mat'));
		if ~exist(imagePath, 'file')
			features = opts.extractorFn(im) ;
			save(imagePath, '-struct', 'features');
		end
	end
	fprintf('\n');
end
% --------------------------------------------------------------------
%                                      Train encoder and encode images
% --------------------------------------------------------------------

if ~exist(fullfile(opts.resultDir, 'codes.mat'), 'file')
	descrs = codingPipeline(imdb, ...
							opts.featuresPath, ...
							opts.resultDir, ...
							opts.transformParams{:}, ...
							opts.encoderParams{:}, ...
							'lite', opts.lite, ...
							'products', opts.products, ...
							'partition', opts.partition, ...
							'encoderParams',opts.encoderParams);
else
	disp('***** loading codes *****');
	load(fullfile(opts.resultDir, 'codes.mat')) ;
	descrs = codes;
	clear codes;
end

diary off ;
diary on ;

% --------------------------------------------------------------------
%                                            Train and evaluate models
% --------------------------------------------------------------------

%% count classes from imdb
if isfield(imdb.images, 'class')
	classRange = unique(imdb.images.class) ;
else
	classRange = 1:numel(imdb.classes.imageIds) ;
end
numClasses = numel(classRange) ;

%%%%%%%%%%%%%%%%%%%%%%%%
% kernel map selection %
%%%%%%%%%%%%%%%%%%%%%%%%

%% apply kernel maps
switch opts.kernel
	case 'linear'
	case 'hell'
		descrs = sign(descrs) .* sqrt(abs(descrs)) ;
	case 'chi2'
		descrs = vl_homkermap(descrs,1,'kchi2') ;
	otherwise
		assert(false) ;
end
%% L2 normalize
descrs = bsxfun(@times, descrs, 1./max(sqrt(sum(descrs.^2)), 1e-12)) ;

%% train and test
train = find(imdb.images.set <= 2) ;
test = find(imdb.images.set == 3) ;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% parameters for svm training %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%***** can be modified *****%
lambda = 1 / (opts.C*numel(train)) ;
par = {'Solver', 'sdca', 'Verbose', ...				
       'BiasMultiplier', 1, ...
       'Epsilon', 0.001, ...
       'MaxNumIterations', 100 * numel(train)} ;

%% initialization
scores = cell(1, numel(classRange)) ;	% predict scores
ap = zeros(1, numel(classRange)) ;		% average precision
ap11 = zeros(1, numel(classRange)) ;	%***** ? *****%
w = cell(1, numel(classRange)) ;		% svm weight
b = cell(1, numel(classRange)) ;		% svm shift

%% training numClasses svm classifiers (1 vs N) and obtain scores corresponding to each class
for c = 1:numel(classRange)			% iterate through all classes
	if isfield(imdb.images, 'class')
		y = 2 * (imdb.images.class == classRange(c)) - 1 ;
	else
		y = - ones(1, numel(imdb.images.id)) ;
		[~,loc] = ismember(imdb.classes.imageIds{classRange(c)}, imdb.images.id) ;
		y(loc) = 1 - imdb.classes.difficult{classRange(c)} ;
	end
	if all(y <= 0), continue ; end

	[w{c},b{c}] = vl_svmtrain(descrs(:,train), y(train), lambda, par{:}) ;
	scores{c} = w{c}' * descrs + b{c} ;

%% computing precision-recall curve
%% note the old standard of average precision calculation method: ap11
	[~,~,info] = vl_pr(y(test), scores{c}(test)) ;
	ap(c) = info.ap ;
	ap11(c) = info.ap_interp_11 ;
	fprintf('class %s AP %.2f; AP 11 %.2f\n', imdb.meta.classes{classRange(c)}, ...
		ap(c) * 100, ap11(c)*100) ;
end
scores = cat(1,scores{:}) ;

diary off ;
diary on ;

% --------------------------------------------------------------------
%                                               output all the results
% --------------------------------------------------------------------

%% confusion matrix (can be computed only if each image has exactly one label)
%% so voc07 dataset doesn't has one.
if isfield(imdb.images, 'class')
	[~,preds] = max(scores, [], 1) ;
	confusion = zeros(numClasses) ;
	for c = 1:numClasses
		sel = find(imdb.images.class == classRange(c) & imdb.images.set == 3) ;
		tmp = accumarray(preds(sel)', 1, [numClasses 1]) ;
		tmp = tmp / max(sum(tmp),1e-10) ;
		confusion(c,:) = tmp(:)' ;
	end
else
	confusion = NaN ;
end

%% save results
%% classifiers
% save(opts.modelPath, 'w', 'b') ;
%% average precision
% save(fullfile(opts.resultDir,'result.mat'), 'scores', 'ap', 'ap11', 'confusion', 'classRange', 'opts') ;

%% mAP
meanAccuracy = sprintf('mean accuracy: %f\n', mean(diag(confusion)));
mAP = sprintf('mAP: %.2f %%; mAP 11: %.2f', mean(ap) * 100, mean(ap11) * 100) ;

%% visualize results
% if strcmp(opts.dataset, 'voc07') == 0
%% confusion matrix
	% figure(1) ; clf ;
	% imagesc(confusion) ; 
	% axis square ;
	% title([opts.prefix ' - ' meanAccuracy]) ;
	% vl_printsize(1) ;
	% print('-dpdf', fullfile(opts.resultDir, 'result-confusion.pdf')) ;
	% print('-djpeg', fullfile(opts.resultDir, 'result-confusion.jpg')) ;
% end

%% average accuracy for each class and the mAP
% figure(2) ; clf ; bar(ap * 100) ;
% title([opts.prefix ' - ' mAP]) ;
% ylabel('AP %%') ; 
% xlabel('class') ;
% grid on ;
% vl_printsize(1) ;
% ylim([0 100]) ;
% print('-dpdf', fullfile(opts.resultDir,'result-ap.pdf')) ;
% print('-djpeg', fullfile(opts.resultDir, 'result-ap.jpg')) ;

disp(meanAccuracy) ;
disp(mAP) ;

diary off ;

end