Skip to content

Commit

Permalink
✨ Add normalizer class with L1 normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
yoshoku committed Jun 19, 2020
1 parent acfc24d commit e156804
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/rumale.rb
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
require 'rumale/feature_extraction/hash_vectorizer'
require 'rumale/feature_extraction/feature_hasher'
require 'rumale/preprocessing/l2_normalizer'
require 'rumale/preprocessing/l1_normalizer'
require 'rumale/preprocessing/min_max_scaler'
require 'rumale/preprocessing/max_abs_scaler'
require 'rumale/preprocessing/standard_scaler'
Expand Down
62 changes: 62 additions & 0 deletions lib/rumale/preprocessing/l1_normalizer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# frozen_string_literal: true

require 'rumale/base/base_estimator'
require 'rumale/base/transformer'

module Rumale
module Preprocessing
# Normalize samples to unit L1-norm.
#
# @example
# normalizer = Rumale::Preprocessing::L1Normalizer.new
# new_samples = normalizer.fit_transform(samples)
class L1Normalizer
include Base::BaseEstimator
include Base::Transformer

# Return the vector consists of L1-norm for each sample.
# @return [Numo::DFloat] (shape: [n_samples])
attr_reader :norm_vec # :nodoc:

# Create a new normalizer for normaliing to L1-norm.
def initialize
@params = {}
@norm_vec = nil
end

# Calculate L1-norms of each sample.
#
# @overload fit(x) -> L1Normalizer
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
# @return [L1Normalizer]
def fit(x, _y = nil)
x = check_convert_sample_array(x)
@norm_vec = x.abs.sum(1)
@norm_vec[@norm_vec.eq(0)] = 1
self
end

# Calculate L1-norms of each sample, and then normalize samples to L1-norm.
#
# @overload fit_transform(x) -> Numo::DFloat
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
# @return [Numo::DFloat] The normalized samples.
def fit_transform(x, _y = nil)
x = check_convert_sample_array(x)
fit(x)
x / @norm_vec.expand_dims(1)
end

# Calculate L1-norms of each sample, and then normalize samples to L1-norm.
# This method calls the fit_transform method. This method exists for the Pipeline class.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
# @return [Numo::DFloat] The normalized samples.
def transform(x)
fit_transform(x)
end
end
end
end
33 changes: 33 additions & 0 deletions spec/rumale/preprocessing/l1_normalizer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe Rumale::Preprocessing::L1Normalizer do
let(:n_samples) { 10 }
let(:n_features) { 4 }
let(:normalizer) { described_class.new }
let(:normalized) { normalizer.fit_transform(x) }

context 'when norm vector does not contain zero' do
let(:x) { Numo::DFloat.new(n_samples, n_features).rand - 0.5 }

it 'normalizes each sample with l1 norm.' do
sum_norm = normalized.abs.sum
expect((sum_norm - n_samples).abs).to be < 1.0e-6
expect(normalizer.norm_vec.class).to eq(Numo::DFloat)
expect(normalizer.norm_vec.ndim).to eq(1)
expect(normalizer.norm_vec.shape[0]).to eq(n_samples)
end
end

context 'when norm vector consists of zero values' do
let(:x) do
Numo::DFloat.new(n_samples, n_features).rand.tap { |x| x[0, true] = Numo::DFloat.zeros(n_features) }
end

it 'does not normalize vectors with zero norm ' do
expect(normalized[0, true]).to eq(x[0, true])
expect(normalizer.norm_vec[0]).to eq(1)
end
end
end

0 comments on commit e156804

Please sign in to comment.