# wepe/MachineLearning

Switch branches/tags
Nothing to show
Latest commit 87903b3 Sep 9, 2015
 .. Failed to load latest commit information. NaiveBayes.py Sep 9, 2015 README.md Sep 9, 2015 test.py Sep 9, 2015

###github不支持Latex，公式显示不了，请查看博文

#1. 朴素贝叶斯的理论基础

##1.1 贝叶斯定理

P(A|B)表示事件B已经发生的前提下，事件A发生的概率，叫做事件B发生下事件A的条件概率。其基本求解公式为：$P(A|B)=\frac{P(AB)}{P(B)}$

$P(B|A)=\frac{P(A|B)P(B)}{P(A)}$

$P(A)=\sum_{i=1}^{n}P(B_{i})P(A|B_{i})$

##1.2 特征条件独立假设

$P(y_{k}|x)=\frac{P(x|y_{k})P(y_{k})}{P(x)}$

$P(y_{k}|x)=\frac{P(x|y_{k})P(y_{k})}{\sum_{k}P(x|y_{k})P(y_{k})}$ 【公式1】

$P(x|y_{k})=P(x_{1},x_{2},...,x_{n}|y_{k})=\prod_{i=1}^{n}P(x_{i}|y_{k})$ 【公式2】

$P(y_{k}|x)=\frac{P(y_{k})\prod_{i=1}^{n}P(x_{i}|y_{k})}{\sum_{k}P(y_{k})\prod_{i=1}^{n}P(x_{i}|y_{k})}$

$f(x)=argmax_{y_{k}} P(y_{k}|x)=argmax_{y_{k}} \frac{P(y_{k})\prod_{i=1}^{n}P(x_{i}|y_{k})}{\sum_{k}P(y_{k})\prod_{i=1}^{n}P(x_{i}|y_{k})}$

$f(x)=argmax P(y_{k})\prod_{i=1}^{n}P(x_{i}|y_{k})$

#2. 三种常见的模型及编程实现

##2.1 多项式模型

$P(y_{k})=\frac{N_{y_{k}}+\alpha}{N+k\alpha}$

N是总的样本个数，k是总的类别个数，$N_{y_{k}}$是类别为$y_{k}$的样本个数，$\alpha$是平滑值。

$P(x_{i}|y_{k})=\frac{N_{y_{k},x_{i}}+\alpha}{N_{y_{k}}+n\alpha}$

$N_{y_{k}}$是类别为$y_{k}$的样本个数，n是特征的维数，$N_{y_{k},x_{i}}$是类别为$y_{k}$的样本中，第i维特征的值是$x_{i}$的样本个数，$\alpha$是平滑值。

### 2.1.1 举例

• 计算先验概率

• 计算各种条件概率

• 对于给定的$x=(2,S)^{T}$，计算：

###2.1.2 编程实现（基于Python，Numpy）

"""
Created on 2015/09/06

@author: wepon (http://2hwp.com)

API Reference: http://scikit-learn.org/stable/modules/naive_bayes.html#naive-bayes
"""
import numpy as np

class MultinomialNB(object):
"""
Naive Bayes classifier for multinomial models
The multinomial Naive Bayes classifier is suitable for classification with
discrete features

Parameters
----------
alpha : float, optional (default=1.0)
Setting alpha = 0 for no smoothing
Setting 0 < alpha < 1 is called Lidstone smoothing
Setting alpha = 1 is called Laplace smoothing
fit_prior : boolean
Whether to learn class prior probabilities or not.
If false, a uniform prior will be used.
class_prior : array-like, size (n_classes,)
Prior probabilities of the classes. If specified the priors are not

Attributes
----------
fit(X,y):
X and y are array-like, represent features and labels.
call fit() method to train Naive Bayes classifier.

predict(X):

"""

def __init__(self,alpha=1.0,fit_prior=True,class_prior=None):
self.alpha = alpha
self.fit_prior = fit_prior
self.class_prior = class_prior
self.classes = None
self.conditional_prob = None

def _calculate_feature_prob(self,feature):
values = np.unique(feature)
total_num = float(len(feature))
value_prob = {}
for v in values:
value_prob[v] = (( np.sum(np.equal(feature,v)) + self.alpha ) /( total_num + len(values)*self.alpha))
return value_prob

def fit(self,X,y):
#TODO: check X,y

self.classes = np.unique(y)
#calculate class prior probabilities: P(y=ck)
if self.class_prior == None:
class_num = len(self.classes)
if not self.fit_prior:
self.class_prior = [1.0/class_num for _ in range(class_num)]  #uniform prior
else:
self.class_prior = []
sample_num = float(len(y))
for c in self.classes:
c_num = np.sum(np.equal(y,c))
self.class_prior.append((c_num+self.alpha)/(sample_num+class_num*self.alpha))

#calculate Conditional Probability: P( xj | y=ck )
self.conditional_prob = {}  # like { c0:{ x0:{ value0:0.2, value1:0.8 }, x1:{} }, c1:{...} }
for c in self.classes:
self.conditional_prob[c] = {}
for i in range(len(X[0])):  #for each feature
feature = X[np.equal(y,c)][:,i]
self.conditional_prob[c][i] = self._calculate_feature_prob(feature)
return self

#given values_prob {value0:0.2,value1:0.1,value3:0.3,.. } and target_value
#return the probability of target_value
def _get_xj_prob(self,values_prob,target_value):
return values_prob[target_value]

#predict a single sample based on (class_prior,conditional_prob)
def _predict_single_sample(self,x):
label = -1
max_posterior_prob = 0

#for each category, calculate its posterior probability: class_prior * conditional_prob
for c_index in range(len(self.classes)):
current_class_prior = self.class_prior[c_index]
current_conditional_prob = 1.0
feature_prob = self.conditional_prob[self.classes[c_index]]
j = 0
for feature_i in feature_prob.keys():
current_conditional_prob *= self._get_xj_prob(feature_prob[feature_i],x[j])
j += 1

#compare posterior probability and update max_posterior_prob, label
if current_class_prior * current_conditional_prob > max_posterior_prob:
max_posterior_prob = current_class_prior * current_conditional_prob
label = self.classes[c_index]
return label

#predict samples (also single sample)
def predict(self,X):
#TODO1:check and raise NoFitError
#ToDO2:check X
if X.ndim == 1:
return self._predict_single_sample(X)
else:
#classify each sample
labels = []
for i in range(X.shape[0]):
label = self._predict_single_sample(X[i])
labels.append(label)
return labels



import numpy as np
X = np.array([
[1,1,1,1,1,2,2,2,2,2,3,3,3,3,3],
[4,5,5,4,4,4,5,5,6,6,6,5,5,6,6]
])
X = X.T
y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])

nb = MultinomialNB(alpha=1.0,fit_prior=True)
nb.fit(X,y)
print nb.predict(np.array([2,4]))#输出-1



##2.2 高斯模型 当特征是连续变量的时候，运用多项式模型就会导致很多$P(x_{i}|y_{k})=0$（不做平滑的情况下），此时即使做平滑，所得到的条件概率也难以描述真实情况。所以处理连续的特征变量，应该采用高斯模型。

###2.2.1 通过一个例子来说明： 性别分类的例子 来自维基

P(身高|性别) x P(体重|性别) x P(脚掌|性别) x P(性别)


   P(身高=6|男) x P(体重=130|男) x P(脚掌=8|男) x P(男)
= 6.1984 x e-9
P(身高=6|女) x P(体重=130|女) x P(脚掌=8|女) x P(女)
= 5.3778 x e-4


• 总结

$P(x_{i}|y_{k})=\frac{1}{\sqrt{2\pi\sigma_{y_{k},i}^{2}}}e^{-\frac{(x_{i}-\mu_{y_{k},i})^{2}}{2 \sigma_{y_{k},i}^{2}}}$

$\mu_{y_{k},i}$表示类别为$y_{k}$的样本中，第i维特征的均值。 $\sigma_{y_{k},i}^{2}$表示类别为$y_{k}$的样本中，第i维特征的方差。

###2.2.2 编程实现

#GaussianNB differ from MultinomialNB in these two method:
# _calculate_feature_prob, _get_xj_prob
class GaussianNB(MultinomialNB):
"""
GaussianNB inherit from MultinomialNB,so it has self.alpha
and self.fit() use alpha to calculate class_prior
However,GaussianNB should calculate class_prior without alpha.
Anyway,it make no big different

"""
#calculate mean(mu) and standard deviation(sigma) of the given feature
def _calculate_feature_prob(self,feature):
mu = np.mean(feature)
sigma = np.std(feature)
return (mu,sigma)

#the probability density for the Gaussian distribution
def _prob_gaussian(self,mu,sigma,x):
return ( 1.0/(sigma * np.sqrt(2 * np.pi)) *
np.exp( - (x - mu)**2 / (2 * sigma**2)) )

#given mu and sigma , return Gaussian distribution probability for target_value
def _get_xj_prob(self,mu_sigma,target_value):
return self._prob_gaussian(mu_sigma[0],mu_sigma[1],target_value)



##2.3 伯努利模型

###2.3.1 编程实现

##3 参考文献