Skip to content

Commit

Permalink
🚨 Fix Rubocop pointed out
Browse files Browse the repository at this point in the history
  • Loading branch information
yoshoku committed May 16, 2020
1 parent f40eb8a commit f049ca1
Show file tree
Hide file tree
Showing 41 changed files with 85 additions and 1 deletion.
2 changes: 2 additions & 0 deletions lib/rumale/base/base_estimator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def enable_linalg?

def enable_parallel?
return false if @params[:n_jobs].nil?

if defined?(Parallel).nil?
warn('If you want to use parallel option, you should install and load Parallel in advance.')
return false
Expand All @@ -34,6 +35,7 @@ def enable_parallel?

def n_processes
return 1 unless enable_parallel?

@params[:n_jobs] <= 0 ? Parallel.processor_count : @params[:n_jobs]
end

Expand Down
4 changes: 4 additions & 0 deletions lib/rumale/clustering/dbscan.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def initialize(eps: 0.5, min_samples: 5, metric: 'euclidean')
def fit(x, _y = nil)
x = check_convert_sample_array(x)
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]

partial_fit(x)
self
end
Expand All @@ -66,6 +67,7 @@ def fit(x, _y = nil)
def fit_predict(x)
x = check_convert_sample_array(x)
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]

partial_fit(x)
labels
end
Expand All @@ -80,6 +82,7 @@ def partial_fit(x)
@labels = Numo::Int32.zeros(n_samples) - 2
n_samples.times do |query_id|
next if @labels[query_id] >= -1

cluster_id += 1 if expand_cluster(metric_mat, query_id, cluster_id)
end
@core_sample_ids = Numo::Int32[*@core_sample_ids.flatten]
Expand All @@ -102,6 +105,7 @@ def expand_cluster(metric_mat, query_id, cluster_id)
while (m = target_ids.shift)
neighbor_ids = region_query(metric_mat[m, true])
next if neighbor_ids.size < @params[:min_samples]

neighbor_ids.each do |n|
target_ids.push(n) if @labels[n] < -1
@labels[n] = cluster_id if @labels[n] <= -1
Expand Down
2 changes: 2 additions & 0 deletions lib/rumale/clustering/gaussian_mixture.rb
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def fit(x, _y = nil)
new_memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
error = (memberships - new_memberships).abs.max
break if error <= @params[:tol]

memberships = new_memberships.dup
end
self
Expand Down Expand Up @@ -209,6 +210,7 @@ def calc_inv_sqrt_det_covariance(covar, covar_type)

def check_enable_linalg(method_name)
return unless @params[:covariance_type] == 'full' && !enable_linalg?

raise "GaussianMixture##{method_name} requires Numo::Linalg when covariance_type is 'full' but that is not loaded."
end
end
Expand Down
2 changes: 2 additions & 0 deletions lib/rumale/clustering/hdbscan.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def initialize(min_samples: 10, min_cluster_size: nil, metric: 'euclidean')
def fit(x, _y = nil)
x = check_convert_sample_array(x)
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]

fit_predict(x)
self
end
Expand All @@ -67,6 +68,7 @@ def fit(x, _y = nil)
def fit_predict(x)
x = check_convert_sample_array(x)
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]

distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
@labels = partial_fit(distance_mat)
end
Expand Down
1 change: 1 addition & 0 deletions lib/rumale/clustering/k_means.rb
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def init_cluster_centers(x)
rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
@cluster_centers = x[rand_id, true].dup
return unless @params[:init] == 'k-means++'

# k-means++ initialize
(1...@params[:n_clusters]).each do |n|
distance_matrix = PairwiseMetric.euclidean_distance(x, @cluster_centers[0...n, true])
Expand Down
4 changes: 4 additions & 0 deletions lib/rumale/clustering/k_medoids.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def initialize(n_clusters: 8, metric: 'euclidean', init: 'k-means++', max_iter:
def fit(x, _not_used = nil)
x = check_convert_sample_array(x)
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]

# initialize some varibales.
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
init_cluster_centers(distance_mat)
Expand All @@ -76,6 +77,7 @@ def fit(x, _not_used = nil)
end
new_error = distance_mat[true, @medoid_ids].mean
break if (error - new_error).abs <= @params[:tol]

error = new_error
end
@cluster_centers = x[@medoid_ids, true].dup if @params[:metric] == 'euclidean'
Expand All @@ -93,6 +95,7 @@ def predict(x)
if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
end

assign_cluster(distance_mat)
end

Expand Down Expand Up @@ -123,6 +126,7 @@ def init_cluster_centers(distance_mat)
sub_rng = @rng.dup
@medoid_ids = Numo::Int32.asarray([*0...n_samples].sample(@params[:n_clusters], random: sub_rng))
return unless @params[:init] == 'k-means++'

# k-means++ initialize
(1...@params[:n_clusters]).each do |n|
distances = distance_mat[true, @medoid_ids[0...n]]
Expand Down
2 changes: 2 additions & 0 deletions lib/rumale/clustering/power_iteration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def initialize(n_clusters: 8, affinity: 'rbf', gamma: nil, init: 'k-means++', ma
def fit(x, _y = nil)
x = check_convert_sample_array(x)
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]

fit_predict(x)
self
end
Expand Down Expand Up @@ -107,6 +108,7 @@ def embedded_space(affinity_mat, max_iter, tol)
new_embedded_line /= new_embedded_line.abs.sum
new_error = (new_embedded_line - embedded_line).abs
break if (new_error - error).abs.max <= tol

embedded_line = new_embedded_line
error = new_error
end
Expand Down
2 changes: 2 additions & 0 deletions lib/rumale/clustering/single_linkage.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def initialize(n_clusters: 2, metric: 'euclidean')
def fit(x, _y = nil)
x = check_convert_sample_array(x)
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]

fit_predict(x)
self
end
Expand All @@ -66,6 +67,7 @@ def fit(x, _y = nil)
def fit_predict(x)
x = check_convert_sample_array(x)
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]

distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
@labels = partial_fit(distance_mat)
end
Expand Down
2 changes: 2 additions & 0 deletions lib/rumale/dataset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed:
Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
raise RangeError, 'The interval of factor is (0, 1).' if factor <= 0 || factor >= 1

# initialize some variables.
rs = random_seed
rs ||= srand
Expand Down Expand Up @@ -101,6 +102,7 @@ def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil)
Rumale::Validation.check_params_boolean(shuffle: shuffle)
Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1

# initialize some variables.
rs = random_seed
rs ||= srand
Expand Down
2 changes: 2 additions & 0 deletions lib/rumale/decomposition/factor_analysis.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,11 @@ def fit(x, _y = nil)
@components = (sqrt_noise_variance.diag.dot(u) * scaler).transpose.dup
@noise_variance = Numo::DFloat.maximum(sample_vars - @components.transpose.dot(@components).diagonal, 1e-12)
next if @params[:tol].nil?

new_loglike = log_likelihood(cov_mat, @components, @noise_variance)
@loglike.push(new_loglike)
break if (old_loglike - new_loglike).abs <= @params[:tol]

old_loglike = new_loglike
end

Expand Down
1 change: 1 addition & 0 deletions lib/rumale/decomposition/pca.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def fit(x, _y = nil)
@params[:max_iter].times do
updated = orthogonalize(covariance_mat.dot(comp_vec))
break if (updated.dot(comp_vec) - 1).abs < @params[:tol]

comp_vec = updated
end
@components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
Expand Down
3 changes: 3 additions & 0 deletions lib/rumale/ensemble/ada_boost_classifier.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def fit(x, y) # rubocop:disable Metrics/AbcSize
# Fit classfier.
ids = Rumale::Utils.choice_ids(n_samples, observation_weights, sub_rng)
break if y[ids].to_a.uniq.size != n_classes

tree = Tree::DecisionTreeClassifier.new(
criterion: @params[:criterion], max_depth: @params[:max_depth],
max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
Expand All @@ -120,12 +121,14 @@ def fit(x, y) # rubocop:disable Metrics/AbcSize
@estimators.push(tree)
@feature_importances += tree.feature_importances
break if error.zero?

# Update observation weights.
log_proba = Numo::NMath.log(proba)
observation_weights *= Numo::NMath.exp(-1.0 * (n_classes - 1).fdiv(n_classes) * (y_codes * log_proba).sum(1))
observation_weights = observation_weights.clip(1.0e-15, nil)
sum_observation_weights = observation_weights.sum
break if sum_observation_weights.zero?

observation_weights /= sum_observation_weights
end
@feature_importances /= @feature_importances.sum
Expand Down
3 changes: 3 additions & 0 deletions lib/rumale/ensemble/ada_boost_regressor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def fit(x, y) # rubocop:disable Metrics/AbcSize
check_sample_tvalue_size(x, y)
# Check target values
raise ArgumentError, 'Expect target value vector to be 1-D arrray' unless y.shape.size == 1

# Initialize some variables.
n_samples, n_features = x.shape
@params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
Expand All @@ -117,6 +118,7 @@ def fit(x, y) # rubocop:disable Metrics/AbcSize
abs_err = ((p - y) / y).abs
err = observation_weights[abs_err.gt(@params[:threshold])].sum
break if err <= 0.0

# Calculate weight.
beta = err**@params[:exponent]
weight = Math.log(1.fdiv(beta))
Expand All @@ -131,6 +133,7 @@ def fit(x, y) # rubocop:disable Metrics/AbcSize
observation_weights = observation_weights.clip(1.0e-15, nil)
sum_observation_weights = observation_weights.sum
break if sum_observation_weights.zero?

observation_weights /= sum_observation_weights
end
@estimator_weights = Numo::DFloat.asarray(@estimator_weights)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ def score(y_true, y_pred)
# calculate entropies.
class_entropy = entropy(y_true)
return 0.0 if class_entropy.zero?

cluster_entropy = entropy(y_pred)
return 0.0 if cluster_entropy.zero?

# calculate mutual information.
mi = MutualInformation.new
mi.score(y_true, y_pred) / Math.sqrt(class_entropy * cluster_entropy)
Expand Down
5 changes: 5 additions & 0 deletions lib/rumale/evaluation_measure/precision_recall.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def precision_each_class(y_true, y_pred)
y_true.sort.to_a.uniq.map do |label|
target_positions = y_pred.eq(label)
next 0.0 if y_pred[target_positions].empty?

n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
n_true_positives / (n_true_positives + n_false_positives)
Expand All @@ -25,6 +26,7 @@ def recall_each_class(y_true, y_pred)
y_true.sort.to_a.uniq.map do |label|
target_positions = y_true.eq(label)
next 0.0 if y_pred[target_positions].empty?

n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
n_true_positives / (n_true_positives + n_false_negatives)
Expand All @@ -35,6 +37,7 @@ def recall_each_class(y_true, y_pred)
def f_score_each_class(y_true, y_pred)
precision_each_class(y_true, y_pred).zip(recall_each_class(y_true, y_pred)).map do |p, r|
next 0.0 if p.zero? && r.zero?

(2.0 * p * r) / (p + r)
end
end
Expand All @@ -44,6 +47,7 @@ def micro_average_precision(y_true, y_pred)
evaluated_values = y_true.sort.to_a.uniq.map do |label|
target_positions = y_pred.eq(label)
next [0.0, 0.0] if y_pred[target_positions].empty?

n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
[n_true_positives, n_true_positives + n_false_positives]
Expand All @@ -57,6 +61,7 @@ def micro_average_recall(y_true, y_pred)
evaluated_values = y_true.sort.to_a.uniq.map do |label|
target_positions = y_true.eq(label)
next 0.0 if y_pred[target_positions].empty?

n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
[n_true_positives, n_true_positives + n_false_negatives]
Expand Down
3 changes: 3 additions & 0 deletions lib/rumale/evaluation_measure/roc_auc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def roc_curve(y_true, y_score, pos_label = nil)
y_score = Numo::DFloat.cast(y_score) unless y_score.is_a?(Numo::DFloat)
raise ArgumentError, 'Expect y_true to be 1-D arrray.' unless y_true.shape[1].nil?
raise ArgumentError, 'Expect y_score to be 1-D arrray.' unless y_score.shape[1].nil?

labels = y_true.to_a.uniq
if pos_label.nil?
raise ArgumentError, 'y_true must be binary labels or pos_label must be specified if y_true is multi-label' unless labels.size == 2
Expand Down Expand Up @@ -96,8 +97,10 @@ def auc(x, y)
y = Numo::NArray.asarray(y) unless y.is_a?(Numo::NArray)
raise ArgumentError, 'Expect x to be 1-D arrray.' unless x.shape[1].nil?
raise ArgumentError, 'Expect y to be 1-D arrray.' unless y.shape[1].nil?

n_samples = [x.shape[0], y.shape[0]].min
raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2

(0...n_samples).to_a.each_cons(2).map { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }.reduce(&:+)
end

Expand Down
2 changes: 2 additions & 0 deletions lib/rumale/evaluation_measure/silhouette_score.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def score(x, y)
cls_pos = y.eq(labels[n])
sz_cluster = cls_pos.count
next unless sz_cluster > 1

cls_dist_mat = dist_mat[cls_pos, cls_pos].dup
cls_dist_mat[cls_dist_mat.diag_indices] = 0.0
intra_dists[cls_pos] = cls_dist_mat.sum(0) / (sz_cluster - 1)
Expand All @@ -57,6 +58,7 @@ def score(x, y)
cls_pos = y.eq(labels[m])
n_clusters.times do |n|
next if m == n

not_cls_pos = y.eq(labels[n])
inter_dists[cls_pos] = Numo::DFloat.minimum(
inter_dists[cls_pos], dist_mat[cls_pos, not_cls_pos].mean(1)
Expand Down
1 change: 1 addition & 0 deletions lib/rumale/feature_extraction/hash_vectorizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def fit(x, _y = nil)
f.each do |k, v|
k = "#{k}#{separator}#{v}".to_sym if v.is_a?(String)
next if @vocabulary.key?(k)

@feature_names.push(k)
@vocabulary[k] = @vocabulary.size
end
Expand Down
2 changes: 2 additions & 0 deletions lib/rumale/kernel_machine/kernel_ridge.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class KernelRidge
def initialize(reg_param: 1.0)
raise TypeError, 'Expect class of reg_param to be Float or Numo::DFloat' unless reg_param.is_a?(Float) || reg_param.is_a?(Numo::DFloat)
raise ArgumentError, 'Expect reg_param array to be 1-D arrray' if reg_param.is_a?(Numo::DFloat) && reg_param.shape.size != 1

@params = {}
@params[:reg_param] = reg_param
@weight_vec = nil
Expand All @@ -55,6 +56,7 @@ def fit(x, y)
@weight_vec = Numo::Linalg.solve(reg_kernel_mat, y, driver: 'sym')
else
raise ArgumentError, 'Expect y and reg_param to have the same number of elements.' unless y.shape[1] == @params[:reg_param].shape[0]

n_outputs = y.shape[1]
@weight_vec = Numo::DFloat.zeros(n_samples, n_outputs)
n_outputs.times do |n|
Expand Down
1 change: 1 addition & 0 deletions lib/rumale/linear_model/base_linear_model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def partial_fit(x, y)
# Update weight.
loss_gradient = calc_loss_gradient(sub_samples, sub_targets, weight)
next if loss_gradient.ne(0.0).count.zero?

weight = calc_new_weight(optimizer, sub_samples, weight, loss_gradient)
end
split_weight(weight)
Expand Down
1 change: 1 addition & 0 deletions lib/rumale/linear_model/linear_regression.rb
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def fit_bias?
def load_linalg?
return false if defined?(Numo::Linalg).nil?
return false if Numo::Linalg::VERSION < '0.1.4'

true
end
end
Expand Down
1 change: 1 addition & 0 deletions lib/rumale/linear_model/ridge.rb
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def fit_sgd(x, y)
def load_linalg?
return false if defined?(Numo::Linalg).nil?
return false if Numo::Linalg::VERSION < '0.1.4'

true
end
end
Expand Down
2 changes: 2 additions & 0 deletions lib/rumale/manifold/mds.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def initialize(n_components: 2, metric: 'euclidean', init: 'random',
def fit(x, _not_used = nil)
x = check_convert_sample_array(x)
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]

# initialize some varibales.
n_samples = x.shape[0]
hi_distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
Expand Down Expand Up @@ -142,6 +143,7 @@ def init_embedding(x)
def terminate?(old_stress, new_stress)
return false if @params[:tol].nil?
return false if old_stress.nil?

(old_stress - new_stress).abs <= @params[:tol]
end

Expand Down

0 comments on commit f049ca1

Please sign in to comment.