Skip to content

Commit

Permalink
✨ Add classifier class with Complement Naive Bayes
Browse files Browse the repository at this point in the history
  • Loading branch information
yoshoku committed Mar 20, 2020
1 parent fe4bc96 commit 2864055
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/rumale.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
require 'rumale/nearest_neighbors/k_neighbors_regressor'
require 'rumale/naive_bayes/base_naive_bayes'
require 'rumale/naive_bayes/bernoulli_nb'
require 'rumale/naive_bayes/complement_nb'
require 'rumale/naive_bayes/gaussian_nb'
require 'rumale/naive_bayes/multinomial_nb'
require 'rumale/tree/node'
Expand Down
85 changes: 85 additions & 0 deletions lib/rumale/naive_bayes/complement_nb.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# frozen_string_literal: true

require 'rumale/naive_bayes/base_naive_bayes'

module Rumale
module NaiveBayes
# ComplementNB is a class that implements Complement Naive Bayes classifier.
#
# @example
# estimator = Rumale::NaiveBayes::ComplementNB.new(smoothing_param: 1.0)
# estimator.fit(training_samples, training_labels)
# results = estimator.predict(testing_samples)
#
# *Reference*
# - Rennie, J. D. M., Shih, L., Teevan, J., and Karger, D. R., "Tackling the Poor Assumptions of Naive Bayes Text Classifiers," ICML' 03, pp. 616--623, 2013.
class ComplementNB < BaseNaiveBayes
# Return the class labels.
# @return [Numo::Int32] (size: n_classes)
attr_reader :classes

# Return the prior probabilities of the classes.
# @return [Numo::DFloat] (shape: [n_classes])
attr_reader :class_priors

# Return the conditional probabilities for features of each class.
# @return [Numo::DFloat] (shape: [n_classes, n_features])
attr_reader :feature_probs

# Create a new classifier with Complement Naive Bayes.
#
# @param smoothing_param [Float] The smoothing parameter.
# @param norm [Boolean] The flag indicating whether to normlize the weight vectors.
def initialize(smoothing_param: 1.0, norm: false)
check_params_numeric(smoothing_param: smoothing_param)
check_params_positive(smoothing_param: smoothing_param)
check_params_boolean(norm: norm)
@params = {}
@params[:smoothing_param] = smoothing_param
@params[:norm] = norm
end

# Fit the model with given training data.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
# to be used for fitting the model.
# @return [ComplementNB] The learned classifier itself.
def fit(x, y)
x = check_convert_sample_array(x)
y = check_convert_label_array(y)
check_sample_label_size(x, y)
n_samples, = x.shape
@classes = Numo::Int32[*y.to_a.uniq.sort]
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
@class_log_probs = Numo::NMath.log(@class_priors)
compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
compl_features += @params[:smoothing_param]
n_classes = @classes.size
@feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
feature_log_probs = Numo::NMath.log(@feature_probs)
@weights = if normalize?
feature_log_probs / feature_log_probs.sum(1).reshape(n_classes, 1)
else
-feature_log_probs
end
self
end

# Calculate confidence scores for samples.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
def decision_function(x)
x = check_convert_sample_array(x)
@class_log_probs + x.dot(@weights.transpose)
end

private

def normalize?
@params[:norm] == true
end
end
end
end
72 changes: 72 additions & 0 deletions spec/rumale/naive_bayes/complement_nb_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe Rumale::NaiveBayes::ComplementNB do
let(:x) { Numo::DFloat[[4, 3, 0, 0], [4, 0, 0, 0], [4, 0, 1, 0], [0, 0, 5, 3], [0, 0, 0, 3], [0, 1, 5, 3]] }
let(:y) { Numo::Int32[1, 1, 1, -1, -1, -1] }
let(:n_samples) { x.shape[0] }
let(:n_features) { x.shape[1] }
let(:classes) { y.to_a.uniq.sort }
let(:n_classes) { classes.size }
let(:estimator) { described_class.new(smoothing_param: 1.0, norm: norm).fit(x, y) }
let(:probs) { estimator.predict_proba(x) }
let(:score) { estimator.score(x, y) }
let(:func_vals) { estimator.decision_function(x) }
let(:predicted) { estimator.predict(x) }
let(:predicted_by_probs) { Numo::Int32[*(Array.new(n_samples) { |n| classes[probs[n, true].max_index] })] }
let(:copied) { Marshal.load(Marshal.dump(estimator)) }

shared_examples 'classification' do
it 'classifies two clusters data.', :aggregate_failures do
expect(estimator.class_priors.class).to eq(Numo::DFloat)
expect(estimator.class_priors.ndim).to eq(1)
expect(estimator.class_priors.shape[0]).to eq(n_classes)
expect(estimator.feature_probs.class).to eq(Numo::DFloat)
expect(estimator.feature_probs.ndim).to eq(2)
expect(estimator.feature_probs.shape[0]).to eq(n_classes)
expect(estimator.feature_probs.shape[1]).to eq(n_features)
expect(estimator.classes.class).to eq(Numo::Int32)
expect(estimator.classes.ndim).to eq(1)
expect(estimator.classes.shape[0]).to eq(n_classes)
expect(func_vals.class).to eq(Numo::DFloat)
expect(func_vals.ndim).to eq(2)
expect(func_vals.shape[0]).to eq(n_samples)
expect(func_vals.shape[1]).to eq(n_classes)
expect(predicted.class).to eq(Numo::Int32)
expect(predicted.ndim).to eq(1)
expect(predicted.shape[0]).to eq(n_samples)
expect(predicted).to eq(y)
expect(score).to eq(1.0)
end

it 'estimates class probabilities with two clusters dataset.', :aggregate_failures do
expect(probs.class).to eq(Numo::DFloat)
expect(probs.ndim).to eq(2)
expect(probs.shape[0]).to eq(n_samples)
expect(probs.shape[1]).to eq(n_classes)
expect(predicted_by_probs).to eq(y)
end
end

context 'when classifier is defined without normalization' do
let(:norm) { false }

it_behaves_like 'classification'
end

context 'when classifier is defined without normalization' do
let(:norm) { true }

it_behaves_like 'classification'

it 'dumps and restores itself using Marshal module.', :aggregate_failures do
expect(estimator.class).to eq(copied.class)
expect(estimator.params).to eq(copied.params)
expect(estimator.classes).to eq(copied.classes)
expect(estimator.class_priors).to eq(copied.class_priors)
expect(estimator.feature_probs).to eq(copied.feature_probs)
expect(score).to eq(copied.score(x, y))
end
end
end

0 comments on commit 2864055

Please sign in to comment.