Skip to content

Commit

Permalink
✨ Add classifier class with Negation Naive Bayes
Browse files Browse the repository at this point in the history
  • Loading branch information
yoshoku committed Mar 20, 2020
1 parent 2864055 commit 3be7164
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/rumale.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
require 'rumale/naive_bayes/complement_nb'
require 'rumale/naive_bayes/gaussian_nb'
require 'rumale/naive_bayes/multinomial_nb'
require 'rumale/naive_bayes/negation_nb'
require 'rumale/tree/node'
require 'rumale/tree/base_decision_tree'
require 'rumale/tree/decision_tree_classifier'
Expand Down
71 changes: 71 additions & 0 deletions lib/rumale/naive_bayes/negation_nb.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# frozen_string_literal: true

require 'rumale/naive_bayes/base_naive_bayes'

module Rumale
module NaiveBayes
# NegationNB is a class that implements Negation Naive Bayes classifier.
#
# @example
# estimator = Rumale::NaiveBayes::NegationNB.new(smoothing_param: 1.0)
# estimator.fit(training_samples, training_labels)
# results = estimator.predict(testing_samples)
#
# *Reference*
# - Komiya, K., Sato, N., Fujimoto, K., and Kotani, Y., "Negation Naive Bayes for Categorization of Product Pages on the Web," RANLP' 11, pp. 586--592, 2011.
class NegationNB < BaseNaiveBayes
# Return the class labels.
# @return [Numo::Int32] (size: n_classes)
attr_reader :classes

# Return the prior probabilities of the classes.
# @return [Numo::DFloat] (shape: [n_classes])
attr_reader :class_priors

# Return the conditional probabilities for features of each class.
# @return [Numo::DFloat] (shape: [n_classes, n_features])
attr_reader :feature_probs

# Create a new classifier with Complement Naive Bayes.
#
# @param smoothing_param [Float] The smoothing parameter.
def initialize(smoothing_param: 1.0)
check_params_numeric(smoothing_param: smoothing_param)
check_params_positive(smoothing_param: smoothing_param)
@params = {}
@params[:smoothing_param] = smoothing_param
end

# Fit the model with given training data.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
# to be used for fitting the model.
# @return [ComplementNB] The learned classifier itself.
def fit(x, y)
x = check_convert_sample_array(x)
y = check_convert_label_array(y)
check_sample_label_size(x, y)
n_samples, = x.shape
@classes = Numo::Int32[*y.to_a.uniq.sort]
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
@class_log_probs = Numo::NMath.log(1 / (1 - @class_priors))
compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
compl_features += @params[:smoothing_param]
n_classes = @classes.size
@feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
@weights = Numo::NMath.log(@feature_probs)
self
end

# Calculate confidence scores for samples.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
def decision_function(x)
x = check_convert_sample_array(x)
@class_log_probs - x.dot(@weights.transpose)
end
end
end
end
58 changes: 58 additions & 0 deletions spec/rumale/naive_bayes/negation_nb_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe Rumale::NaiveBayes::NegationNB do
let(:x) { Numo::DFloat[[4, 3, 0, 0], [4, 0, 0, 0], [4, 0, 1, 0], [0, 0, 5, 3], [0, 0, 0, 3], [0, 1, 5, 3]] }
let(:y) { Numo::Int32[1, 1, 1, -1, -1, -1] }
let(:n_samples) { x.shape[0] }
let(:n_features) { x.shape[1] }
let(:classes) { y.to_a.uniq.sort }
let(:n_classes) { classes.size }
let(:estimator) { described_class.new(smoothing_param: 1.0).fit(x, y) }
let(:probs) { estimator.predict_proba(x) }
let(:score) { estimator.score(x, y) }
let(:func_vals) { estimator.decision_function(x) }
let(:predicted) { estimator.predict(x) }
let(:predicted_by_probs) { Numo::Int32[*(Array.new(n_samples) { |n| classes[probs[n, true].max_index] })] }
let(:copied) { Marshal.load(Marshal.dump(estimator)) }

it 'classifies two clusters data.', :aggregate_failures do
expect(estimator.class_priors.class).to eq(Numo::DFloat)
expect(estimator.class_priors.ndim).to eq(1)
expect(estimator.class_priors.shape[0]).to eq(n_classes)
expect(estimator.feature_probs.class).to eq(Numo::DFloat)
expect(estimator.feature_probs.ndim).to eq(2)
expect(estimator.feature_probs.shape[0]).to eq(n_classes)
expect(estimator.feature_probs.shape[1]).to eq(n_features)
expect(estimator.classes.class).to eq(Numo::Int32)
expect(estimator.classes.ndim).to eq(1)
expect(estimator.classes.shape[0]).to eq(n_classes)
expect(func_vals.class).to eq(Numo::DFloat)
expect(func_vals.ndim).to eq(2)
expect(func_vals.shape[0]).to eq(n_samples)
expect(func_vals.shape[1]).to eq(n_classes)
expect(predicted.class).to eq(Numo::Int32)
expect(predicted.ndim).to eq(1)
expect(predicted.shape[0]).to eq(n_samples)
expect(predicted).to eq(y)
expect(score).to eq(1.0)
end

it 'estimates class probabilities with two clusters dataset.', :aggregate_failures do
expect(probs.class).to eq(Numo::DFloat)
expect(probs.ndim).to eq(2)
expect(probs.shape[0]).to eq(n_samples)
expect(probs.shape[1]).to eq(n_classes)
expect(predicted_by_probs).to eq(y)
end

it 'dumps and restores itself using Marshal module.', :aggregate_failures do
expect(estimator.class).to eq(copied.class)
expect(estimator.params[:smoothing_param]).to eq(copied.params[:smoothing_param])
expect(estimator.classes).to eq(copied.classes)
expect(estimator.class_priors).to eq(copied.class_priors)
expect(estimator.feature_probs).to eq(copied.feature_probs)
expect(score).to eq(copied.score(x, y))
end
end

0 comments on commit 3be7164

Please sign in to comment.