# Naive Bayes 실습

# 1. Gaussian Naive Bayes

- 데이터, 모듈 불러오기

In [1]:
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
import pandas as pd

In [2]:
iris = datasets.load_iris()
df_X = pd.DataFrame(iris.data)
df_Y = pd.DataFrame(iris.target)

In [3]:
df_X.head()

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [4]:
df_Y.head()

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0


- 모델 피팅

In [5]:
gnb = GaussianNB()
fitted = gnb.fit(iris.data, iris.target)
y_pred = fitted.predict(iris.data)

In [6]:
# 각 범주에 속할 확률
fitted.predict_proba(iris.data)[[1,48,51,100]]

array([[1.00000000e+000, 1.51480769e-017, 2.34820051e-025],
       [1.00000000e+000, 2.63876217e-018, 2.79566024e-025],
       [7.27347795e-102, 9.45169639e-001, 5.48303606e-002],
       [3.23245181e-254, 6.35381031e-011, 1.00000000e+000]])

In [7]:
# 예측한 범주
fitted.predict(iris.data)[[1,48,51,100]]

array([0, 0, 1, 2])

- Confusion matrix 구하기

In [8]:
from sklearn.metrics import confusion_matrix

In [9]:
confusion_matrix(iris.target, y_pred)

array([[50,  0,  0],
       [ 0, 47,  3],
       [ 0,  3, 47]], dtype=int64)

- Prior 설정하기

In [10]:
gnb2 = GaussianNB(priors=[1/100, 1/100, 98/100])

fitted2 = gnb2.fit(iris.data, iris.target)
y_pred2 = fitted2.predict(iris.data)

confusion_matrix(iris.target,y_pred2)

array([[50,  0,  0],
       [ 0, 33, 17],
       [ 0,  0, 50]], dtype=int64)

In [11]:
gnb2 = GaussianNB(priors=[1/100, 98/100, 1/100])

fitted2 = gnb2.fit(iris.data,iris.target)
y_pred2 = fitted2.predict(iris.data)

confusion_matrix(iris.target,y_pred2)

array([[50,  0,  0],
       [ 0, 50,  0],
       [ 0, 14, 36]], dtype=int64)

# 2. Multinomial Naive Bayes

- 모듈 불러오기 및 데이터 생성

In [12]:
from sklearn.naive_bayes import MultinomialNB
import numpy as np

In [13]:
X = np.random.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])

In [14]:
X[0]

array([0, 4, 4, 4, 2, 4, 3, 3, 3, 4, 3, 4, 4, 1, 4, 3, 2, 2, 1, 2, 3, 2,
       0, 3, 4, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 1, 3, 3, 0, 2, 2, 4, 3, 1,
       2, 1, 1, 0, 3, 2, 0, 1, 0, 1, 1, 4, 2, 3, 3, 4, 0, 3, 4, 4, 0, 1,
       1, 1, 0, 0, 1, 0, 2, 3, 3, 1, 1, 4, 4, 2, 1, 4, 0, 1, 3, 0, 3, 2,
       3, 0, 4, 3, 3, 2, 0, 2, 0, 4, 1, 1])

In [15]:
y

array([1, 2, 3, 4, 5, 6])

- Multinomial naive bayes 모델 생성

In [16]:
clf = MultinomialNB()
clf.fit(X, y)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [17]:
clf.predict_proba(X[2:3])

array([[1.21528820e-37, 1.57918415e-32, 1.00000000e+00, 9.14315695e-39,
        8.55423602e-33, 1.40396484e-33]])

In [18]:
print(clf.predict(X[2:3]))

[3]


- prior 변경해보기

In [19]:
clf2 = MultinomialNB(class_prior=[0.1, 0.5, 0.1, 0.1, 0.1, 0.1])
clf2.fit(X, y)

MultinomialNB(alpha=1.0, class_prior=[0.1, 0.5, 0.1, 0.1, 0.1, 0.1],
              fit_prior=True)

In [20]:
clf2.predict_proba(X[2:3])

array([[1.21528820e-37, 7.89592075e-32, 1.00000000e+00, 9.14315695e-39,
        8.55423602e-33, 1.40396484e-33]])

In [21]:
print(clf.predict(X[2:3]))

[3]
