In [1]:
%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from sklearn.neighbors import LocalOutlierFactor
import numpy as np

In [2]:
def localOutlierFactorAnomaly(contamination, fileName):
    data = pd.read_csv(fileName, header=0, names["x1","x2","x3"])
    localOutlierFactor = LocalOutlierFactor(contamination = contamination)
    localOutlierFactorAnomaly = localOutlierFactor.fit_predict(data[["x1","x2","x3"]])
    data["Anomaly"] = localOutlierFactorAnomaly
    
    outlierIndexes = np.where(data["Anomaly"] == 1)
    clearIndexes = np.where(data["Anomaly"] == -1)
    clearData = data.drop(clearIndexes[0])
    outlierData = data.drop(outlierIndexes[0])
    
   # fig = plt.figure(figsize=(10,10))
    #ax = fig.add_subplot(projection='3d')
    #ax.scatter(data["x1"], data["x2"], data["x3"])
    #ax.scatter(data["x1"].loc[outlierIndexes], data["x2"].loc[outlierIndexes], data["x3"].loc[outlierIndexes], c="orange")
    
    #fig.show()
    
    return clearData, outlierData

In [3]:
def ellipticEnvelopAnomaly(contamination, fileName):
    data = pd.read_csv(fileName)
    ellip = EllipticEnvelope(random_state = 0, contamination = contamination).fit(data[["x1","x2","x3"]])
    ellipAnomaly = ellip.predict(data[["x1","x2","x3"]])
    data["Anomaly"] = ellipAnomaly
    
    outlierIndexes = np.where(data["Anomaly"] == 1)
    clearIndexes = np.where(data["Anomaly"] == -1)
    clearData = data.drop(clearIndexes[0])
    outlierData = data.drop(outlierIndexes[0])
    
   # fig = plt.figure(figsize=(10,10))
  #  ax = fig.add_subplot(projection='3d')
   # ax.scatter(data["x1"], data["x2"], data["x3"])
   # ax.scatter(data["x1"].loc[outlierIndexes], data["x2"].loc[outlierIndexes], data["x3"].loc[outlierIndexes], c="orange")
    
  #  fig.show()
    
    return clearData, outlierData

In [4]:
def iForestAnomaly(contamination, fileName):
    data = pd.read_csv(fileName)
    clf = IsolationForest(random_state=0, contamination = contamination).fit(data[["x1","x2","x3"]])
    clfAnomaly = clf.predict(data[["x1","x2","x3"]])
    data["Anomaly"] = clfAnomaly
    
    outlierIndexes = np.where(data["Anomaly"] == 1)
    clearIndexes = np.where(data["Anomaly"] == -1)
    clearData = data.drop(clearIndexes[0])
    outlierData = data.drop(outlierIndexes[0])
    
   # fig = plt.figure(figsize=(10,10))
   # ax = fig.add_subplot(projection='3d')
   # ax.scatter(data["x1"], data["x2"], data["x3"])
   # ax.scatter(data["x1"].loc[outlierIndexes], data["x2"].loc[outlierIndexes], data["x3"].loc[outlierIndexes], c="orange")
    
   # fig.show()
    
    return clearData, outlierData

In [5]:
def drawBox(clearData, outlierData):
    minX1 = clearData["x1"].min()
    maxX1 = clearData["x1"].max()
    
    minX2 = clearData["x2"].min()
    maxX2 = clearData["x2"].max()
    
    minX3 = clearData["x3"].min()
    maxX3 = clearData["x3"].max()
    
    l = maxX1 - minX1
    w = maxX2 - minX2
    h = maxX3 - minX3
    
    center = [(maxX1 + minX1) / 2, (maxX2 + minX2) / 2, (maxX3 + minX3) / 2]
    size = [l, w, h]
    
    ox, oy, oz = center
    l, w, h = size

    x = np.linspace(ox-l/2,ox+l/2,num=10)
    y = np.linspace(oy-w/2,oy+w/2,num=10)
    z = np.linspace(oz-h/2,oz+h/2,num=10)
    x1, z1 = np.meshgrid(x, z)
    y11 = np.ones_like(x1)*(oy-w/2)
    y12 = np.ones_like(x1)*(oy+w/2)
    x2, y2 = np.meshgrid(x, y)
    z21 = np.ones_like(x2)*(oz-h/2)
    z22 = np.ones_like(x2)*(oz+h/2)
    y3, z3 = np.meshgrid(y, z)
    x31 = np.ones_like(y3)*(ox-l/2)
    x32 = np.ones_like(y3)*(ox+l/2)

    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(projection='3d')
    # outside surface
    ax.plot_wireframe(x1, y11, z1, rstride=4, cstride=4, color='#2980b9', alpha=1)
    # inside surface
    ax.plot_wireframe(x1, y12, z1, rstride=4, cstride=4, color='#2980b9', alpha=1)
    # bottom surface
    ax.plot_wireframe(x2, y2, z21, rstride=4, cstride=4, color='#2980b9', alpha=1)
    # upper surface
    ax.plot_wireframe(x2, y2, z22, rstride=4, cstride=4, color='#2980b9', alpha=1)
    # left surface
    ax.plot_wireframe(x31, y3, z3, rstride=4, cstride=4, color='#2980b9', alpha=1)
    # right surface
    ax.plot_wireframe(x32, y3, z3, rstride=4, cstride=4, color='#2980b9', alpha=1)
    ax.set_xlabel("X")
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    
    outliersInBox = outlierData.loc[(outlierData["x1"] <= maxX1) & (outlierData["x1"] >= minX1) &
                                   (outlierData["x2"] <= maxX2) & (outlierData["x2"] >= minX2) &
                                   (outlierData["x3"] <= maxX3) & (outlierData["x3"] >= minX3)]
    
    outliersOutBox = outlierData.loc[(outlierData["x1"] > maxX1) | (outlierData["x1"] < minX1) |
                                   (outlierData["x2"] > maxX2) | (outlierData["x2"] < minX2) |
                                   (outlierData["x3"] > maxX3) | (outlierData["x3"] < minX3)]
    
    ax.scatter(clearData["x1"], clearData["x2"], clearData["x3"])
    ax.scatter(outliersOutBox["x1"], outliersOutBox["x2"], outliersOutBox["x3"], c="black")
    ax.scatter(outliersInBox["x1"], outliersInBox["x2"], outliersInBox["x3"], c="red")
    plt.show()
    
    print("Size:", size, "\n")
    print("Center:", center)
    

In [6]:
fileName = "dataset1.csv"
contamination = 0.001

In [7]:
clearData, outlierData = localOutlierFactorAnomaly(contamination, fileName)
drawBox(clearData, outlierData)

<IPython.core.display.Javascript object>

Size: [1.9799999999999969, 1.9800000000000004, 1.9800000000000004] 

Center: [19.0, 15.0, 10.0]


In [8]:
clearData, outlierData = iForestAnomaly(contamination, fileName)
drawBox(clearData, outlierData)

<IPython.core.display.Javascript object>

Size: [1.9799999999999969, 1.9800000000000004, 1.9800000000000004] 

Center: [19.0, 15.0, 10.0]
