Skip to content

Commit

Permalink
✨ Add solver option to specify optimization algorithm of PCA
Browse files Browse the repository at this point in the history
  • Loading branch information
yoshoku committed Aug 24, 2019
1 parent 2264a61 commit f7198cb
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 39 deletions.
36 changes: 25 additions & 11 deletions lib/rumale/decomposition/pca.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ module Decomposition
# decomposer = Rumale::Decomposition::PCA.new(n_components: 2)
# representaion = decomposer.fit_transform(samples)
#
# # If Numo::Linalg is installed, you can specify 'evd' for the solver option.
# require 'numo/linalg/autoloader'
# decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'evd')
# representaion = decomposer.fit_transform(samples)
#
# *Reference*
# - A. Sharma and K K. Paliwal, "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
class PCA
Expand All @@ -23,7 +28,7 @@ class PCA
attr_reader :components

# Returns the mean vector.
# @return [Numo::DFloat] (shape: [n_features]
# @return [Numo::DFloat] (shape: [n_features])
attr_reader :mean

# Return the random generator.
Expand All @@ -33,15 +38,19 @@ class PCA
# Create a new transformer with PCA.
#
# @param n_components [Integer] The number of principal components.
# @param max_iter [Integer] The maximum number of iterations.
# @param tol [Float] The tolerance of termination criterion.
# @param solver [String] The algorithm for the optimization ('fpt' or 'evd').
# 'fpt' uses the fixed-point algorithm. 'evd' performs eigen value decomposition of the covariance matrix of samples.
# @param max_iter [Integer] The maximum number of iterations. If solver = 'evd', this parameter is ignored.
# @param tol [Float] The tolerance of termination criterion. If solver = 'evd', this parameter is ignored.
# @param random_seed [Integer] The seed value using to initialize the random generator.
def initialize(n_components: 2, max_iter: 100, tol: 1.0e-4, random_seed: nil)
def initialize(n_components: 2, solver: 'fpt', max_iter: 100, tol: 1.0e-4, random_seed: nil)
check_params_integer(n_components: n_components, max_iter: max_iter)
check_params_string(solver: solver)
check_params_float(tol: tol)
check_params_type_or_nil(Integer, random_seed: random_seed)
check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol)
@params = {}
@params[:solver] = solver != 'evd' ? 'fpt' : 'evd'
@params[:n_components] = n_components
@params[:max_iter] = max_iter
@params[:tol] = tol
Expand Down Expand Up @@ -69,14 +78,19 @@ def fit(x, _y = nil)
centered_x = x - @mean
# optimization.
covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
@params[:n_components].times do
comp_vec = Rumale::Utils.rand_uniform(n_features, sub_rng)
@params[:max_iter].times do
updated = orthogonalize(covariance_mat.dot(comp_vec))
break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
comp_vec = updated
if @params[:solver] == 'evd' && enable_linalg?
_, evecs = Numo::Linalg.eigh(covariance_mat, vals_range: (n_features - @params[:n_components])...n_features)
@components = evecs.reverse(1).transpose.dup
else
@params[:n_components].times do
comp_vec = Rumale::Utils.rand_uniform(n_features, sub_rng)
@params[:max_iter].times do
updated = orthogonalize(covariance_mat.dot(comp_vec))
break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
comp_vec = updated
end
@components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
end
@components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
end
self
end
Expand Down
89 changes: 61 additions & 28 deletions spec/decomposition/pca_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,70 @@
RSpec.describe Rumale::Decomposition::PCA do
let(:x) { Marshal.load(File.read(__dir__ + '/../test_samples.dat')) }
let(:n_components) { 16 }
let(:decomposer) { described_class.new(n_components: n_components, tol: 1.0e-8, random_seed: 1) }
let(:solver) { 'fpt' }
let(:decomposer) { described_class.new(n_components: n_components, solver: solver, tol: 1.0e-8, random_seed: 1) }
let(:transformer) { Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 32, random_seed: 1) }

it 'projects high-dimensinal data into subspace.' do
samples = transformer.fit_transform(x)
sub_samples = decomposer.fit_transform(samples)

n_samples, n_features = samples.shape
expect(sub_samples.class).to eq(Numo::DFloat)
expect(sub_samples.shape[0]).to eq(n_samples)
expect(sub_samples.shape[1]).to eq(n_components)

expect(decomposer.components.class).to eq(Numo::DFloat)
expect(decomposer.components.shape[0]).to eq(n_components)
expect(decomposer.components.shape[1]).to eq(n_features)
expect(decomposer.mean.class).to eq(Numo::DFloat)
expect(decomposer.mean.shape[0]).to eq(n_features)
expect(decomposer.mean.shape[1]).to be_nil

rec_samples = decomposer.inverse_transform(sub_samples)
mse = Numo::NMath.sqrt(((samples - rec_samples)**2).sum(1)).mean
expect(mse).to be <= 0.1
context 'when solver is fix point algorithm' do
it 'projects high-dimensinal data into subspace.' do
samples = transformer.fit_transform(x)
sub_samples = decomposer.fit_transform(samples)
n_samples, n_features = samples.shape
expect(sub_samples.class).to eq(Numo::DFloat)
expect(sub_samples.shape[0]).to eq(n_samples)
expect(sub_samples.shape[1]).to eq(n_components)
expect(decomposer.components.class).to eq(Numo::DFloat)
expect(decomposer.components.shape[0]).to eq(n_components)
expect(decomposer.components.shape[1]).to eq(n_features)
expect(decomposer.mean.class).to eq(Numo::DFloat)
expect(decomposer.mean.shape[0]).to eq(n_features)
expect(decomposer.mean.shape[1]).to be_nil
rec_samples = decomposer.inverse_transform(sub_samples)
mse = Numo::NMath.sqrt(((samples - rec_samples)**2).sum(1)).mean
expect(mse).to be <= 0.1
end

it 'projects data into one-dimensional subspace.' do
liner = described_class.new(n_components: 1, random_seed: 1)
sub_x = liner.fit_transform(x)
rec_x = liner.inverse_transform(sub_x.expand_dims(1))
expect(rec_x.shape[0]).to eq(x.shape[0])
expect(rec_x.shape[1]).to eq(x.shape[1])
mse = Numo::NMath.sqrt(((x - rec_x)**2).sum(1)).mean
expect(mse).to be <= 0.5
end
end

it 'projects data into one-dimensional subspace.' do
liner = described_class.new(n_components: 1, random_seed: 1)
sub_x = liner.fit_transform(x)
rec_x = liner.inverse_transform(sub_x.expand_dims(1))
expect(rec_x.shape[0]).to eq(x.shape[0])
expect(rec_x.shape[1]).to eq(x.shape[1])
mse = Numo::NMath.sqrt(((x - rec_x)**2).sum(1)).mean
expect(mse).to be <= 0.5
context 'when solver is eigen value decomposition' do
let(:solver) { 'evd' }

it 'projects high-dimensinal data into subspace.' do
samples = transformer.fit_transform(x)
sub_samples = decomposer.fit_transform(samples)
n_samples, n_features = samples.shape
expect(sub_samples.class).to eq(Numo::DFloat)
expect(sub_samples.shape[0]).to eq(n_samples)
expect(sub_samples.shape[1]).to eq(n_components)
expect(decomposer.components.class).to eq(Numo::DFloat)
expect(decomposer.components.shape[0]).to eq(n_components)
expect(decomposer.components.shape[1]).to eq(n_features)
expect(decomposer.mean.class).to eq(Numo::DFloat)
expect(decomposer.mean.shape[0]).to eq(n_features)
expect(decomposer.mean.shape[1]).to be_nil
rec_samples = decomposer.inverse_transform(sub_samples)
mse = Numo::NMath.sqrt(((samples - rec_samples)**2).sum(1)).mean
expect(mse).to be <= 0.1
end

it 'projects data into one-dimensional subspace.' do
liner = described_class.new(n_components: 1, random_seed: 1)
sub_x = liner.fit_transform(x)
rec_x = liner.inverse_transform(sub_x.expand_dims(1))
expect(rec_x.shape[0]).to eq(x.shape[0])
expect(rec_x.shape[1]).to eq(x.shape[1])
mse = Numo::NMath.sqrt(((x - rec_x)**2).sum(1)).mean
expect(mse).to be <= 0.5
end
end

it 'dumps and restores itself using Marshal module.' do
Expand All @@ -45,6 +77,7 @@
copied = Marshal.load(Marshal.dump(decomposer))
expect(decomposer.class).to eq(copied.class)
expect(decomposer.params[:n_components]).to eq(copied.params[:n_components])
expect(decomposer.params[:solver]).to eq(copied.params[:solver])
expect(decomposer.params[:max_iter]).to eq(copied.params[:max_iter])
expect(decomposer.params[:tol]).to eq(copied.params[:tol])
expect(decomposer.params[:random_seed]).to eq(copied.params[:random_seed])
Expand Down

0 comments on commit f7198cb

Please sign in to comment.