Skip to content

Commit

Permalink
✨ Add transformer class with Kernel Fisher Discriminant Aanalysis
Browse files Browse the repository at this point in the history
  • Loading branch information
yoshoku committed Apr 5, 2020
1 parent d6cb597 commit df0c6c9
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/rumale.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
require 'rumale/linear_model/elastic_net'
require 'rumale/kernel_machine/kernel_svc'
require 'rumale/kernel_machine/kernel_pca'
require 'rumale/kernel_machine/kernel_fda'
require 'rumale/kernel_machine/kernel_ridge'
require 'rumale/polynomial_model/base_factorization_machine'
require 'rumale/polynomial_model/factorization_machine_classifier'
Expand Down
120 changes: 120 additions & 0 deletions lib/rumale/kernel_machine/kernel_fda.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# frozen_string_literal: true

require 'rumale/base/base_estimator'
require 'rumale/base/transformer'

module Rumale
module KernelMachine
# KernelFDA is a class that implements Kernel Fisher Discriminant Analysis.
#
# @example
# require 'numo/linalg/autoloader'
#
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(x_train)
# kfda = Rumale::KernelMachine::KernelFDA.new
# mapped_traininig_samples = kfda.fit_transform(kernel_mat_train, y)
#
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(x_test, x_train)
# mapped_test_samples = kfda.transform(kernel_mat_test)
#
# *Reference*
# - Baudat, G. and Anouar, F., "Generalized Discriminant Analysis using a Kernel Approach," Neural Computation, vol. 12, pp. 2385--2404, 2000.
class KernelFDA
include Base::BaseEstimator
include Base::Transformer

# Returns the eigenvectors for embedding.
# @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
attr_reader :alphas

# Create a new transformer with Kernel FDA.
#
# @param n_components [Integer] The number of components.
# @param reg_param [Float] The regularization parameter.
def initialize(n_components: nil, reg_param: 1e-8)
check_params_numeric_or_nil(n_components: n_components)
check_params_numeric(reg_param: reg_param)
@params = {}
@params[:n_components] = n_components
@params[:reg_param] = reg_param
@alphas = nil
@row_mean = nil
@all_mean = nil
end

# Fit the model with given training data.
# To execute this method, Numo::Linalg must be loaded.
#
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
# The kernel matrix of the training data to be used for fitting the model.
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
# @return [KernelFDA] The learned transformer itself.
def fit(x, y)
x = check_convert_sample_array(x)
y = check_convert_label_array(y)
check_sample_label_size(x, y)
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
raise 'KernelFDA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?

# initialize some variables.
n_samples = x.shape[0]
@classes = Numo::Int32[*y.to_a.uniq.sort]
n_classes = @classes.size
n_components = if @params[:n_components].nil?
[n_samples, n_classes - 1].min
else
[n_samples, @params[:n_components]].min
end

# centering
@row_mean = x.mean(0)
@all_mean = @row_mean.sum.fdiv(n_samples)
centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean

# calculate between and within scatter matrix.
class_mat = Numo::DFloat.zeros(n_samples, n_samples)
@classes.each do |label|
idx_vec = y.eq(label)
class_mat += Numo::DFloat.cast(idx_vec).outer(idx_vec) / idx_vec.count
end
between_mat = centered_kernel_mat.dot(class_mat).dot(centered_kernel_mat.transpose)
within_mat = centered_kernel_mat.dot(centered_kernel_mat.transpose) + @params[:reg_param] * Numo::DFloat.eye(n_samples)

# calculate projection matrix.
eig_vals, eig_vecs = Numo::Linalg.eigh(
between_mat, within_mat,
vals_range: (n_samples - n_components)...n_samples
)
@alphas = eig_vecs.reverse(1).dup
self
end

# Fit the model with training data, and then transform them with the learned model.
# To execute this method, Numo::Linalg must be loaded.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_samples])
# The kernel matrix of the training data to be used for fitting the model and transformed.
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
def fit_transform(x, y)
x = check_convert_sample_array(x)
y = check_convert_label_array(y)
check_sample_label_size(x, y)
fit(x, y).transform(x)
end

# Transform the given data with the learned model.
#
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
# The kernel matrix between testing samples and training samples to be transformed.
# @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
def transform(x)
x = check_convert_sample_array(x)
col_mean = x.sum(1) / @row_mean.shape[0]
centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
transformed = centered_kernel_mat.dot(@alphas)
@params[:n_components] == 1 ? transformed[true, 0].dup : transformed
end
end
end
end
80 changes: 80 additions & 0 deletions spec/rumale/kernel_machine/kernel_fda_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe Rumale::KernelMachine::KernelFDA do
let(:n_components) { nil }
let(:transformer) { described_class.new(n_components: n_components) }
let(:splitter) { Rumale::ModelSelection::ShuffleSplit.new(n_splits: 1, test_size: 0.1, train_size: 0.9, random_seed: 1) }
let(:validation_ids) { splitter.split(x, y).first }
let(:train_ids) { validation_ids[0] }
let(:test_ids) { validation_ids[1] }
let(:x_train) { x[train_ids, true].dup }
let(:x_test) { x[test_ids, true].dup }
let(:y_train) { y[train_ids].dup }
let(:y_test) { y[test_ids].dup }
let(:n_train_samples) { x_train.shape[0] }
let(:n_test_samples) { x_test.shape[0] }

describe 'basic examples' do
let(:dataset) { three_clusters_dataset }
let(:x) { dataset[0] }
let(:y) { dataset[1] }
let(:n_classes) { y.to_a.uniq.size - 1 }
let(:kernel_mat_train) { Rumale::PairwiseMetric.linear_kernel(x_train, nil) }
let(:kernel_mat_test) { Rumale::PairwiseMetric.linear_kernel(x_test, x_train) }
let(:z_train) { transformer.fit_transform(kernel_mat_train, y_train) }
let(:z_test) { transformer.transform(kernel_mat_test) }
let(:copied) { Marshal.load(Marshal.dump(transformer.fit(kernel_mat_train, y_train))) }

it 'maps into subspace.', :aggregate_failures do
expect(z_train.class).to eq(Numo::DFloat)
expect(z_train.ndim).to eq(2)
expect(z_train.shape[0]).to eq(n_train_samples)
expect(z_train.shape[1]).to eq(n_classes)
expect(z_test.class).to eq(Numo::DFloat)
expect(z_test.ndim).to eq(2)
expect(z_test.shape[0]).to eq(n_test_samples)
expect(z_test.shape[1]).to eq(n_classes)
expect(transformer.alphas.class).to eq(Numo::DFloat)
expect(transformer.alphas.ndim).to eq(2)
expect(transformer.alphas.shape[0]).to eq(n_train_samples)
expect(transformer.alphas.shape[1]).to eq(n_classes)
end

it 'dumps and restores itself using Marshal module.', :aggregate_failures do
expect(transformer.class).to eq(copied.class)
expect(transformer.params[:n_components]).to eq(copied.params[:n_components])
expect(transformer.params[:reg_param]).to eq(copied.params[:reg_param])
expect(transformer.alphas).to eq(copied.alphas)
expect(transformer.instance_variable_get(:@row_mean)).to eq(copied.instance_variable_get(:@row_mean))
expect(transformer.instance_variable_get(:@all_mean)).to eq(copied.instance_variable_get(:@all_mean))
expect(((z_test - copied.transform(kernel_mat_test))**2).sum).to be < 1.0e-8
end
end

describe 'using with nearest neighbor classifier' do
let(:dataset) { Rumale::Dataset.make_circles(200, factor: 0.4, noise: 0.03, random_seed: 1) }
let(:x) { dataset[0] }
let(:y) { dataset[1] }
let(:n_components) { 1 }
let(:kernel_mat_train) { Rumale::PairwiseMetric.rbf_kernel(x_train, nil, 1.0) }
let(:kernel_mat_test) { Rumale::PairwiseMetric.rbf_kernel(x_test, x_train, 1.0) }
let(:z_train) { transformer.fit_transform(kernel_mat_train, y_train) }
let(:z_test) { transformer.transform(kernel_mat_test) }
let(:classifier) { Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1).fit(z_train.expand_dims(1), y_train) }
let(:train_score) { classifier.score(z_train.expand_dims(1), y_train) }
let(:test_score) { classifier.score(z_test.expand_dims(1), y_test) }

it 'maps to a linearly separable space', :aggregate_failures do
expect(z_train.class).to eq(Numo::DFloat)
expect(z_train.ndim).to eq(1)
expect(z_train.shape[0]).to eq(n_train_samples)
expect(z_test.class).to eq(Numo::DFloat)
expect(z_test.ndim).to eq(1)
expect(z_test.shape[0]).to eq(n_test_samples)
expect(train_score).to be_within(0.01).of(1.0)
expect(test_score).to be_within(0.01).of(1.0)
end
end
end

0 comments on commit df0c6c9

Please sign in to comment.