# 库函数导入

In [1]:
import warnings 
warnings.filterwarnings('ignore')
import numpy as np
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

# 数据导入&分析

In [2]:
X,y=datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# 模型训练

In [3]:
clf=GaussianNB(var_smoothing=1e-8)
clf.fit(X_train,y_train)

GaussianNB(var_smoothing=1e-08)

# 模型预测

In [4]:
# 评估
y_pred = clf.predict(X_test)
# acc=clf.score(X_test,y_test)
acc=np.sum(y_test==y_pred)/X_test.shape[0] # X_test.shape[0]表示有多少个测试数据
print('Test acc: %.3f' % acc )

# 预测
y_proba= clf.predict_proba(X_test) #作用于测试集
print(clf.predict(X_test))
print('预计的概率值',y_proba)

Test acc: 0.967
[2 1 0 2 0 2 0 1 1 1 1 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]
预计的概率值 [[1.63542393e-232 2.18880483e-006 9.99997811e-001]
 [1.82692801e-082 9.99998304e-001 1.69619014e-006]
 [1.00000000e+000 7.10262098e-019 3.65456024e-028]
 [1.58680734e-305 1.04649339e-006 9.99998954e-001]
 [1.00000000e+000 8.59181403e-017 4.22166197e-027]
 [6.40803143e-321 1.56452195e-010 1.00000000e+000]
 [1.00000000e+000 1.09798677e-016 5.30284123e-027]
 [1.25190778e-146 7.74051994e-001 2.25948006e-001]
 [5.34645514e-150 9.07564841e-001 9.24351594e-002]
 [5.67460363e-093 9.99882109e-001 1.17891398e-004]
 [2.38828441e-210 5.29609348e-001 4.70390652e-001]
 [8.12453035e-132 9.43762533e-001 5.62374665e-002]
 [5.25421962e-132 9.98864359e-001 1.13564136e-003]
 [1.24562927e-139 9.49838596e-001 5.01614037e-002]
 [4.08442906e-140 9.88043845e-001 1.19561547e-002]
 [1.00000000e+000 7.12848075e-019 4.10169173e-029]
 [4.19763309e-131 9.87944967e-001 1.20550326e-002]
 [4.13452764e-111 9.99942383e-001 5.76169034e-00

# 模拟离散数据集--贝叶斯分类

In [5]:
import random
import numpy as np
from sklearn.naive_bayes import CategoricalNB
from sklearn.model_selection import train_test_split

# 数据导入&分析

In [6]:
# # 模拟数据
rng=np.random.RandomState(1)# numpy.random.RandomState作用类似seed，都是一种如果预先定义好随机状态后，生成随机数的在下一次调用相同的numpy.random.RandomState的时候，得到与原来一模一样的随机。
X=rng.randint(5,size=(600,100)) # 600个100维的数据
y=np.array([1,2,3,4,5,6]*100)
data=np.c_[X,y] #把特征和标签站在一起
random.shuffle(data)
X=data[:,:-1]
y=data[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# 模型训练&预测

In [7]:
clf = CategoricalNB(alpha=1)
clf.fit(X_train, y_train)
acc = clf.score(X_test, y_test)
print("Test Acc : %.3f" % acc)

Test Acc : 0.600


In [8]:
x = rng.randint(5, size=(1, 100))
print(clf.predict_proba(x))
print(clf.predict(x))

[[0.00246731 0.0864692  0.00132822 0.80762245 0.00119057 0.10092225]]
[4]
