✨ Add normalizer class with L1 normalization

yoshoku · Jun 19, 2020 · e156804 · e156804
1 parent acfc24d
commit e156804
Show file tree

Hide file tree

Showing 3 changed files with 96 additions and 0 deletions.
diff --git a/lib/rumale.rb b/lib/rumale.rb
@@ -94,6 +94,7 @@
 require 'rumale/feature_extraction/hash_vectorizer'
 require 'rumale/feature_extraction/feature_hasher'
 require 'rumale/preprocessing/l2_normalizer'
+require 'rumale/preprocessing/l1_normalizer'
 require 'rumale/preprocessing/min_max_scaler'
 require 'rumale/preprocessing/max_abs_scaler'
 require 'rumale/preprocessing/standard_scaler'

diff --git a/lib/rumale/preprocessing/l1_normalizer.rb b/lib/rumale/preprocessing/l1_normalizer.rb
@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+
+module Rumale
+  module Preprocessing
+    # Normalize samples to unit L1-norm.
+    #
+    # @example
+    #   normalizer = Rumale::Preprocessing::L1Normalizer.new
+    #   new_samples = normalizer.fit_transform(samples)
+    class L1Normalizer
+      include Base::BaseEstimator
+      include Base::Transformer
+
+      # Return the vector consists of L1-norm for each sample.
+      # @return [Numo::DFloat] (shape: [n_samples])
+      attr_reader :norm_vec # :nodoc:
+
+      # Create a new normalizer for normaliing to L1-norm.
+      def initialize
+        @params = {}
+        @norm_vec = nil
+      end
+
+      # Calculate L1-norms of each sample.
+      #
+      # @overload fit(x) -> L1Normalizer
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
+      # @return [L1Normalizer]
+      def fit(x, _y = nil)
+        x = check_convert_sample_array(x)
+        @norm_vec = x.abs.sum(1)
+        @norm_vec[@norm_vec.eq(0)] = 1
+        self
+      end
+
+      # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
+      # @return [Numo::DFloat] The normalized samples.
+      def fit_transform(x, _y = nil)
+        x = check_convert_sample_array(x)
+        fit(x)
+        x / @norm_vec.expand_dims(1)
+      end
+
+      # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
+      # This method calls the fit_transform method. This method exists for the Pipeline class.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
+      # @return [Numo::DFloat] The normalized samples.
+      def transform(x)
+        fit_transform(x)
+      end
+    end
+  end
+end
diff --git a/spec/rumale/preprocessing/l1_normalizer_spec.rb b/spec/rumale/preprocessing/l1_normalizer_spec.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Rumale::Preprocessing::L1Normalizer do
+  let(:n_samples) { 10 }
+  let(:n_features) { 4 }
+  let(:normalizer) { described_class.new }
+  let(:normalized) { normalizer.fit_transform(x) }
+
+  context 'when norm vector does not contain zero' do
+    let(:x) { Numo::DFloat.new(n_samples, n_features).rand - 0.5 }
+
+    it 'normalizes each sample with l1 norm.' do
+      sum_norm = normalized.abs.sum
+      expect((sum_norm - n_samples).abs).to be < 1.0e-6
+      expect(normalizer.norm_vec.class).to eq(Numo::DFloat)
+      expect(normalizer.norm_vec.ndim).to eq(1)
+      expect(normalizer.norm_vec.shape[0]).to eq(n_samples)
+    end
+  end
+
+  context 'when norm vector consists of zero values' do
+    let(:x) do
+      Numo::DFloat.new(n_samples, n_features).rand.tap { |x| x[0, true] = Numo::DFloat.zeros(n_features) }
+    end
+
+    it 'does not normalize vectors with zero norm ' do
+      expect(normalized[0, true]).to eq(x[0, true])
+      expect(normalizer.norm_vec[0]).to eq(1)
+    end
+  end
+end