✨ Add transformer class with Neighbourhood Component Analysis

yoshoku · Feb 28, 2020 · d296821 · d296821
1 parent 0d8ca4e
commit d296821
Show file tree

Hide file tree

Showing 3 changed files with 271 additions and 0 deletions.
diff --git a/lib/rumale.rb b/lib/rumale.rb
@@ -79,6 +79,7 @@
 require 'rumale/manifold/tsne'
 require 'rumale/manifold/mds'
 require 'rumale/metric_learning/fisher_discriminant_analysis'
+require 'rumale/metric_learning/neighbourhood_component_analysis.rb'
 require 'rumale/neural_network/adam'
 require 'rumale/neural_network/base_mlp'
 require 'rumale/neural_network/mlp_regressor'

diff --git a/lib/rumale/metric_learning/neighbourhood_component_analysis.rb b/lib/rumale/metric_learning/neighbourhood_component_analysis.rb
@@ -0,0 +1,179 @@
+# frozen_string_literal: true
+
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+require 'mopti/scaled_conjugate_gradient'
+
+module Rumale
+  module MetricLearning
+    # NeighbourhoodComponentAnalysis is a class that implements Neighbourhood Component Analysis.
+    #
+    # @example
+    #   transformer = Rumale::MetricLearning::NeighbourhoodComponentAnalysis.new
+    #   transformer.fit(training_samples, traininig_labels)
+    #   low_samples = transformer.transform(testing_samples)
+    #
+    # *Reference*
+    # - Goldberger, J., Roweis, S., Hinton, G., and Salakhutdinov, R., "Neighbourhood Component Analysis," Advances in NIPS'17, pp. 513--520, 2005.
+    class NeighbourhoodComponentAnalysis
+      include Base::BaseEstimator
+      include Base::Transformer
+
+      # Returns the neighbourhood components.
+      # @return [Numo::DFloat] (shape: [n_components, n_features])
+      attr_reader :components
+
+      # Return the number of iterations run for optimization
+      # @return [Integer]
+      attr_reader :n_iter
+
+      # Return the random generator.
+      # @return [Random]
+      attr_reader :rng
+
+      # Create a new transformer with NeighbourhoodComponentAnalysis.
+      #
+      # @param n_components [Integer] The number of components.
+      # @param init [String] The initialization method for components ('random' or 'pca').
+      # @param tol [Float] The tolerance of termination criterion.
+      # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_components: nil, init: 'random', tol: 1e-6, verbose: false, random_seed: nil)
+        check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
+        check_params_numeric(tol: tol)
+        check_params_string(init: init)
+        check_params_boolean(verbose: verbose)
+        @params = {}
+        @params[:n_components] = n_components
+        @params[:init] = init
+        @params[:tol] = tol
+        @params[:verbose] = verbose
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @components = nil
+        @n_iter = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
+      # @return [NeighbourhoodComponentAnalysis] The learned classifier itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_label_array(y)
+        check_sample_label_size(x, y)
+        n_features = x.shape[1]
+        n_components = if @params[:n_components].nil?
+                         n_features
+                       else
+                         [n_features, @params[:n_components]].min
+                       end
+        @components, @n_iter = optimize_components(x, y, n_features, n_components)
+        self
+      end
+
+      # Fit the model with training data, and then transform them with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
+      def fit_transform(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_label_array(y)
+        fit(x, y).transform(x)
+      end
+
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
+      def transform(x)
+        x = check_convert_sample_array(x)
+        x.dot(@components.transpose)
+      end
+
+      private
+
+      def init_components(x, n_features, n_components)
+        if @params[:init] == 'pca'
+          pca = Rumale::Decomposition::PCA.new(n_components: n_components, solver: 'evd')
+          pca.fit(x).components.flatten.dup
+        else
+          Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
+        end
+      end
+
+      def optimize_components(x, y, n_features, n_components)
+        # initialize components.
+        comp_init = init_components(x, n_features, n_components)
+        # initialize optimization results.
+        res = {}
+        res[:x] = comp_init
+        res[:n_iter] = 0
+        # perform optimization.
+        optimizer = Mopti::ScaledConjugateGradient.new(
+          fnc: method(:nca_loss), jcb: method(:nca_dloss), x_init: comp_init, args: [x, y], ftol: @params[:tol]
+        )
+        fold = 0.0
+        dold = 0.0
+        optimizer.each do |prm|
+          res = prm
+          puts "[NeighbourhoodComponentAnalysis] Loss after #{res[:n_iter]} epochs: #{n_samples - res[:fnc]}" if @params[:verbose]
+          break if (fold - res[:fnc]).abs <= @params[:tol] && (dold - res[:jcb]).abs <= @params[:tol]
+          fold = res[:fnc]
+          dold = res[:jcb]
+        end
+        # return the results.
+        n_iter = res[:n_iter]
+        comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
+        [comps, n_iter]
+      end
+
+      def nca_loss(w, x, y)
+        # initialize some variables.
+        n_samples, n_features = x.shape
+        n_components = w.size / n_features
+        # projection.
+        w = w.reshape(n_components, n_features)
+        z = x.dot(w.transpose)
+        # calculate probability matrix.
+        prob_mat = probability_matrix(z)
+        # calculate loss.
+        # NOTE:
+        # NCA attempts to maximize its objective function.
+        # For the minization algorithm, the objective function value is subtracted from the maixmum value (n_samples).
+        mask_mat = y.expand_dims(1).eq(y)
+        masked_prob_mat = prob_mat * mask_mat
+        n_samples - masked_prob_mat.sum
+      end
+
+      def nca_dloss(w, x, y)
+        # initialize some variables.
+        n_features = x.shape[1]
+        n_components = w.size / n_features
+        # projection.
+        w = w.reshape(n_components, n_features)
+        z = x.dot(w.transpose)
+        # calculate probability matrix.
+        prob_mat = probability_matrix(z)
+        # calculate gradient.
+        mask_mat = y.expand_dims(1).eq(y)
+        masked_prob_mat = prob_mat * mask_mat
+        weighted_prob_mat = masked_prob_mat - prob_mat * masked_prob_mat.sum(1).expand_dims(1)
+        weighted_prob_mat += weighted_prob_mat.transpose
+        weighted_prob_mat[weighted_prob_mat.diag_indices] = -weighted_prob_mat.sum(0)
+        gradient = 2 * z.transpose.dot(weighted_prob_mat).dot(x)
+        -gradient.flatten.dup
+      end
+
+      def probability_matrix(z)
+        prob_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z))
+        prob_mat[prob_mat.diag_indices] = 0.0
+        prob_mat /= prob_mat.sum(1).expand_dims(1)
+        prob_mat
+      end
+    end
+  end
+end
diff --git a/spec/rumale/metric_learning/neighbourhood_component_analysis_spec.rb b/spec/rumale/metric_learning/neighbourhood_component_analysis_spec.rb
@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Rumale::MetricLearning::NeighbourhoodComponentAnalysis do
+  let(:dataset) { three_clusters_dataset }
+  let(:x) do
+    # This data occur sample overlap between classes by dimensionality reduction with PCA.
+    Numo::DFloat.hstack([dataset[0], 4 * Rumale::Utils.rand_normal([dataset[0].shape[0], 1], Random.new(1))])
+  end
+  let(:y) { dataset[1] }
+  let(:classes) { y.to_a.uniq.sort }
+  let(:n_samples) { x.shape[0] }
+  let(:n_features) { x.shape[1] }
+  let(:n_classes) { classes.size }
+  let(:n_components) { nil }
+  let(:init) { 'random' }
+  let(:transformer) { described_class.new(n_components: n_components, init: init, random_seed: 1) }
+  let(:z) { transformer.fit_transform(x, y) }
+
+  context 'when n_components is not given' do
+    it 'projects data into subspace', :aggregate_failures do
+      expect(z).to be_a(Numo::DFloat)
+      expect(z.ndim).to eq(2)
+      expect(z.shape[0]).to eq(n_samples)
+      expect(z.shape[1]).to eq(n_features)
+      expect(transformer.components).to be_a(Numo::DFloat)
+      expect(transformer.components.ndim).to eq(2)
+      expect(transformer.components.shape[0]).to eq(n_features)
+      expect(transformer.components.shape[1]).to eq(n_features)
+      expect(transformer.n_iter).to be_a(Numeric)
+    end
+  end
+
+  context 'when n_components sets to 2' do
+    let(:n_components) { 2 }
+    let(:splitter) { Rumale::ModelSelection::ShuffleSplit.new(n_splits: 1, test_size: 0.1, train_size: 0.9, random_seed: 1) }
+    let(:validation_ids) { splitter.split(x, y).first }
+    let(:train_ids) { validation_ids[0] }
+    let(:test_ids) { validation_ids[1] }
+    let(:x_train) { x[train_ids, true].dup }
+    let(:x_test) { x[test_ids, true].dup }
+    let(:y_train) { y[train_ids].dup }
+    let(:y_test) { y[test_ids].dup }
+    let(:z_train) { transformer.fit_transform(x_train, y_train) }
+    let(:z_test) { transformer.transform(x_test) }
+    let(:classifier) { Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1) }
+
+    before { classifier.fit(z_train, y_train) }
+
+    it 'projects data into a higly discriminating subspace', :aggregate_failures do
+      expect(transformer.components).to be_a(Numo::DFloat)
+      expect(transformer.components.ndim).to eq(2)
+      expect(transformer.components.shape[0]).to eq(n_components)
+      expect(transformer.components.shape[1]).to eq(n_features)
+      expect(classifier.score(z_test, y_test)).to be_within(0.05).of(1.0)
+    end
+  end
+
+  context 'when subspace dimensionality is one' do
+    let(:n_components) { 1 }
+
+    it 'projects data into one-dimensional subspace.', :aggregate_failures do
+      expect(z).to be_a(Numo::DFloat)
+      expect(z.ndim).to eq(1)
+      expect(z.shape[0]).to eq(n_samples)
+      expect(transformer.components).to be_a(Numo::DFloat)
+      expect(transformer.components.ndim).to eq(1)
+      expect(transformer.components.shape[0]).to eq(n_features)
+    end
+  end
+
+  context 'when initializing components with PCA' do
+    let(:init) { 'pca' }
+
+    before { transformer.fit_transform(x, y) }
+
+    it 'converges more quickly with simple dataset' do
+      expect(transformer.n_iter).to be < 5
+    end
+  end
+
+  it 'dumps and restores itself using Marshal module.', :aggregate_failures do
+    copied = Marshal.load(Marshal.dump(transformer.fit(x, y)))
+    expect(copied.class).to eq(transformer.class)
+    expect(copied.params).to eq(transformer.params)
+    expect(copied.components).to eq(transformer.components)
+    expect(copied.n_iter).to eq(copied.n_iter)
+    expect(copied.rng).to eq(copied.rng)
+  end
+end