-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
✨ Add transformer class with Neighbourhood Component Analysis
- Loading branch information
Showing
3 changed files
with
271 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
179 changes: 179 additions & 0 deletions
179
lib/rumale/metric_learning/neighbourhood_component_analysis.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
# frozen_string_literal: true | ||
|
||
require 'rumale/base/base_estimator' | ||
require 'rumale/base/transformer' | ||
require 'mopti/scaled_conjugate_gradient' | ||
|
||
module Rumale | ||
module MetricLearning | ||
# NeighbourhoodComponentAnalysis is a class that implements Neighbourhood Component Analysis. | ||
# | ||
# @example | ||
# transformer = Rumale::MetricLearning::NeighbourhoodComponentAnalysis.new | ||
# transformer.fit(training_samples, traininig_labels) | ||
# low_samples = transformer.transform(testing_samples) | ||
# | ||
# *Reference* | ||
# - Goldberger, J., Roweis, S., Hinton, G., and Salakhutdinov, R., "Neighbourhood Component Analysis," Advances in NIPS'17, pp. 513--520, 2005. | ||
class NeighbourhoodComponentAnalysis | ||
include Base::BaseEstimator | ||
include Base::Transformer | ||
|
||
# Returns the neighbourhood components. | ||
# @return [Numo::DFloat] (shape: [n_components, n_features]) | ||
attr_reader :components | ||
|
||
# Return the number of iterations run for optimization | ||
# @return [Integer] | ||
attr_reader :n_iter | ||
|
||
# Return the random generator. | ||
# @return [Random] | ||
attr_reader :rng | ||
|
||
# Create a new transformer with NeighbourhoodComponentAnalysis. | ||
# | ||
# @param n_components [Integer] The number of components. | ||
# @param init [String] The initialization method for components ('random' or 'pca'). | ||
# @param tol [Float] The tolerance of termination criterion. | ||
# @param verbose [Boolean] The flag indicating whether to output loss during iteration. | ||
# @param random_seed [Integer] The seed value using to initialize the random generator. | ||
def initialize(n_components: nil, init: 'random', tol: 1e-6, verbose: false, random_seed: nil) | ||
check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed) | ||
check_params_numeric(tol: tol) | ||
check_params_string(init: init) | ||
check_params_boolean(verbose: verbose) | ||
@params = {} | ||
@params[:n_components] = n_components | ||
@params[:init] = init | ||
@params[:tol] = tol | ||
@params[:verbose] = verbose | ||
@params[:random_seed] = random_seed | ||
@params[:random_seed] ||= srand | ||
@components = nil | ||
@n_iter = nil | ||
@rng = Random.new(@params[:random_seed]) | ||
end | ||
|
||
# Fit the model with given training data. | ||
# | ||
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model. | ||
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model. | ||
# @return [NeighbourhoodComponentAnalysis] The learned classifier itself. | ||
def fit(x, y) | ||
x = check_convert_sample_array(x) | ||
y = check_convert_label_array(y) | ||
check_sample_label_size(x, y) | ||
n_features = x.shape[1] | ||
n_components = if @params[:n_components].nil? | ||
n_features | ||
else | ||
[n_features, @params[:n_components]].min | ||
end | ||
@components, @n_iter = optimize_components(x, y, n_features, n_components) | ||
self | ||
end | ||
|
||
# Fit the model with training data, and then transform them with the learned model. | ||
# | ||
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model. | ||
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model. | ||
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data | ||
def fit_transform(x, y) | ||
x = check_convert_sample_array(x) | ||
y = check_convert_label_array(y) | ||
fit(x, y).transform(x) | ||
end | ||
|
||
# Transform the given data with the learned model. | ||
# | ||
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model. | ||
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data. | ||
def transform(x) | ||
x = check_convert_sample_array(x) | ||
x.dot(@components.transpose) | ||
end | ||
|
||
private | ||
|
||
def init_components(x, n_features, n_components) | ||
if @params[:init] == 'pca' | ||
pca = Rumale::Decomposition::PCA.new(n_components: n_components, solver: 'evd') | ||
pca.fit(x).components.flatten.dup | ||
else | ||
Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup | ||
end | ||
end | ||
|
||
def optimize_components(x, y, n_features, n_components) | ||
# initialize components. | ||
comp_init = init_components(x, n_features, n_components) | ||
# initialize optimization results. | ||
res = {} | ||
res[:x] = comp_init | ||
res[:n_iter] = 0 | ||
# perform optimization. | ||
optimizer = Mopti::ScaledConjugateGradient.new( | ||
fnc: method(:nca_loss), jcb: method(:nca_dloss), x_init: comp_init, args: [x, y], ftol: @params[:tol] | ||
) | ||
fold = 0.0 | ||
dold = 0.0 | ||
optimizer.each do |prm| | ||
res = prm | ||
puts "[NeighbourhoodComponentAnalysis] Loss after #{res[:n_iter]} epochs: #{n_samples - res[:fnc]}" if @params[:verbose] | ||
break if (fold - res[:fnc]).abs <= @params[:tol] && (dold - res[:jcb]).abs <= @params[:tol] | ||
fold = res[:fnc] | ||
dold = res[:jcb] | ||
end | ||
# return the results. | ||
n_iter = res[:n_iter] | ||
comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features) | ||
[comps, n_iter] | ||
end | ||
|
||
def nca_loss(w, x, y) | ||
# initialize some variables. | ||
n_samples, n_features = x.shape | ||
n_components = w.size / n_features | ||
# projection. | ||
w = w.reshape(n_components, n_features) | ||
z = x.dot(w.transpose) | ||
# calculate probability matrix. | ||
prob_mat = probability_matrix(z) | ||
# calculate loss. | ||
# NOTE: | ||
# NCA attempts to maximize its objective function. | ||
# For the minization algorithm, the objective function value is subtracted from the maixmum value (n_samples). | ||
mask_mat = y.expand_dims(1).eq(y) | ||
masked_prob_mat = prob_mat * mask_mat | ||
n_samples - masked_prob_mat.sum | ||
end | ||
|
||
def nca_dloss(w, x, y) | ||
# initialize some variables. | ||
n_features = x.shape[1] | ||
n_components = w.size / n_features | ||
# projection. | ||
w = w.reshape(n_components, n_features) | ||
z = x.dot(w.transpose) | ||
# calculate probability matrix. | ||
prob_mat = probability_matrix(z) | ||
# calculate gradient. | ||
mask_mat = y.expand_dims(1).eq(y) | ||
masked_prob_mat = prob_mat * mask_mat | ||
weighted_prob_mat = masked_prob_mat - prob_mat * masked_prob_mat.sum(1).expand_dims(1) | ||
weighted_prob_mat += weighted_prob_mat.transpose | ||
weighted_prob_mat[weighted_prob_mat.diag_indices] = -weighted_prob_mat.sum(0) | ||
gradient = 2 * z.transpose.dot(weighted_prob_mat).dot(x) | ||
-gradient.flatten.dup | ||
end | ||
|
||
def probability_matrix(z) | ||
prob_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z)) | ||
prob_mat[prob_mat.diag_indices] = 0.0 | ||
prob_mat /= prob_mat.sum(1).expand_dims(1) | ||
prob_mat | ||
end | ||
end | ||
end | ||
end |
91 changes: 91 additions & 0 deletions
91
spec/rumale/metric_learning/neighbourhood_component_analysis_spec.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# frozen_string_literal: true | ||
|
||
require 'spec_helper' | ||
|
||
RSpec.describe Rumale::MetricLearning::NeighbourhoodComponentAnalysis do | ||
let(:dataset) { three_clusters_dataset } | ||
let(:x) do | ||
# This data occur sample overlap between classes by dimensionality reduction with PCA. | ||
Numo::DFloat.hstack([dataset[0], 4 * Rumale::Utils.rand_normal([dataset[0].shape[0], 1], Random.new(1))]) | ||
end | ||
let(:y) { dataset[1] } | ||
let(:classes) { y.to_a.uniq.sort } | ||
let(:n_samples) { x.shape[0] } | ||
let(:n_features) { x.shape[1] } | ||
let(:n_classes) { classes.size } | ||
let(:n_components) { nil } | ||
let(:init) { 'random' } | ||
let(:transformer) { described_class.new(n_components: n_components, init: init, random_seed: 1) } | ||
let(:z) { transformer.fit_transform(x, y) } | ||
|
||
context 'when n_components is not given' do | ||
it 'projects data into subspace', :aggregate_failures do | ||
expect(z).to be_a(Numo::DFloat) | ||
expect(z.ndim).to eq(2) | ||
expect(z.shape[0]).to eq(n_samples) | ||
expect(z.shape[1]).to eq(n_features) | ||
expect(transformer.components).to be_a(Numo::DFloat) | ||
expect(transformer.components.ndim).to eq(2) | ||
expect(transformer.components.shape[0]).to eq(n_features) | ||
expect(transformer.components.shape[1]).to eq(n_features) | ||
expect(transformer.n_iter).to be_a(Numeric) | ||
end | ||
end | ||
|
||
context 'when n_components sets to 2' do | ||
let(:n_components) { 2 } | ||
let(:splitter) { Rumale::ModelSelection::ShuffleSplit.new(n_splits: 1, test_size: 0.1, train_size: 0.9, random_seed: 1) } | ||
let(:validation_ids) { splitter.split(x, y).first } | ||
let(:train_ids) { validation_ids[0] } | ||
let(:test_ids) { validation_ids[1] } | ||
let(:x_train) { x[train_ids, true].dup } | ||
let(:x_test) { x[test_ids, true].dup } | ||
let(:y_train) { y[train_ids].dup } | ||
let(:y_test) { y[test_ids].dup } | ||
let(:z_train) { transformer.fit_transform(x_train, y_train) } | ||
let(:z_test) { transformer.transform(x_test) } | ||
let(:classifier) { Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1) } | ||
|
||
before { classifier.fit(z_train, y_train) } | ||
|
||
it 'projects data into a higly discriminating subspace', :aggregate_failures do | ||
expect(transformer.components).to be_a(Numo::DFloat) | ||
expect(transformer.components.ndim).to eq(2) | ||
expect(transformer.components.shape[0]).to eq(n_components) | ||
expect(transformer.components.shape[1]).to eq(n_features) | ||
expect(classifier.score(z_test, y_test)).to be_within(0.05).of(1.0) | ||
end | ||
end | ||
|
||
context 'when subspace dimensionality is one' do | ||
let(:n_components) { 1 } | ||
|
||
it 'projects data into one-dimensional subspace.', :aggregate_failures do | ||
expect(z).to be_a(Numo::DFloat) | ||
expect(z.ndim).to eq(1) | ||
expect(z.shape[0]).to eq(n_samples) | ||
expect(transformer.components).to be_a(Numo::DFloat) | ||
expect(transformer.components.ndim).to eq(1) | ||
expect(transformer.components.shape[0]).to eq(n_features) | ||
end | ||
end | ||
|
||
context 'when initializing components with PCA' do | ||
let(:init) { 'pca' } | ||
|
||
before { transformer.fit_transform(x, y) } | ||
|
||
it 'converges more quickly with simple dataset' do | ||
expect(transformer.n_iter).to be < 5 | ||
end | ||
end | ||
|
||
it 'dumps and restores itself using Marshal module.', :aggregate_failures do | ||
copied = Marshal.load(Marshal.dump(transformer.fit(x, y))) | ||
expect(copied.class).to eq(transformer.class) | ||
expect(copied.params).to eq(transformer.params) | ||
expect(copied.components).to eq(transformer.components) | ||
expect(copied.n_iter).to eq(copied.n_iter) | ||
expect(copied.rng).to eq(copied.rng) | ||
end | ||
end |