Skip to content

Commit

Permalink
Merge pull request #551 from KulikDM/QMCD-update
Browse files Browse the repository at this point in the history
QMCD normalization fix
  • Loading branch information
yzhao062 committed Jun 21, 2024
2 parents efae65d + c59cabf commit 95e55ef
Showing 1 changed file with 26 additions and 26 deletions.
52 changes: 26 additions & 26 deletions pyod/models/qmcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from __future__ import print_function

import numpy as np
import scipy.stats as stats
from numba import njit, prange
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import check_array
Expand All @@ -18,20 +19,21 @@


@njit(fastmath=True, parallel=True)
def _wrap_around_discrepancy(data):
def _wrap_around_discrepancy(data, check):
"""Wrap-around Quasi-Monte Carlo discrepancy method"""

n = data.shape[0]
d = data.shape[1]
p = check.shape[0]

disc = np.zeros(n)
disc = np.zeros(p)

for i in prange(n):
for i in prange(p):
dc = 0.0
for j in prange(n):
prod = 1.0
for k in prange(d):
x_kikj = abs(data[i, k] - data[j, k])
x_kikj = abs(check[i, k] - data[j, k])
prod *= 3.0 / 2.0 - x_kikj + x_kikj ** 2

dc += prod
Expand Down Expand Up @@ -94,20 +96,23 @@ def fit(self, X, y=None):
self._set_n_classes(y)

# Normalize data between 0 and 1
scaler = MinMaxScaler()
X_norm = scaler.fit_transform(X)
X_norm = (X_norm / (X_norm.max(axis=0, keepdims=True)
+ np.spacing(0)))
self._scaler = MinMaxScaler()
X_norm = self._scaler.fit_transform(X)

self._fitted_data = X_norm.copy()

# Calculate WD QMCD scores
scores = _wrap_around_discrepancy(X_norm)
scores = _wrap_around_discrepancy(X_norm, X_norm)

# Normalize scores between 0 and 1
scores = (scores - scores.min()) / (scores.max() - scores.min())
# Get criterion for inverting scores
self._is_flipped = False
skew = stats.skew(scores)
kurt = stats.kurtosis(scores)

# Invert score order if majority is beyond 0.5
if len(scores[scores > 0.5]) > 0.5 * len(scores):
scores = 1 - scores
# Invert score order based on criterion
if (skew<0) or ((skew>=0) & (kurt<0)):
scores = scores.max() + scores.min() - scores
self._is_flipped = True

self.decision_scores_ = scores

Expand Down Expand Up @@ -140,20 +145,15 @@ def decision_function(self, X):

X = check_array(X)

# Normalize data between 0 and 1
scaler = MinMaxScaler()
X_norm = scaler.fit_transform(X)
X_norm = (X_norm / (X_norm.max(axis=0, keepdims=True)
+ np.spacing(0)))
# Scale data to fitted data
X_norm = self._scaler.transform(X)

# Calculate WD QMCD scores
scores = _wrap_around_discrepancy(X_norm)
scores = _wrap_around_discrepancy(self._fitted_data, X_norm)

# Normalize scores between 0 and 1
scores = (scores - scores.min()) / (scores.max() - scores.min())
# Invert score order based on criterion
if self._is_flipped:
scores = self.decision_scores_.max() + self.decision_scores_.min() - scores

# Invert score order if majority is beyond 0.5
if len(scores[scores > 0.5]) > 0.5 * len(scores):
scores = 1 - scores

return scores
return scores

0 comments on commit 95e55ef

Please sign in to comment.