-
Notifications
You must be signed in to change notification settings - Fork 1
/
nbinom_fit.py
executable file
·100 lines (81 loc) · 3.21 KB
/
nbinom_fit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
# fit_nbinom
# Copyright (C) 2014 Gokcen Eraslan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
import numpy as np
from scipy.special import gammaln
from scipy.special import psi
# from scipy.misc import factorial # Old scipy path
from scipy.factorial import factorial
from scipy.optimize import fmin_l_bfgs_b as optim
infinitesimal = np.finfo(np.float).eps
def log_likelihood(params, *args):
r, p = params
X = args[0]
N = X.size
# MLE estimate based on the formula on Wikipedia:
# http://en.wikipedia.org/wiki/Negative_binomial_distribution#Maximum_likelihood_estimation
result = np.sum(gammaln(X + r)) \
- np.sum(np.log(factorial(X))) \
- N * (gammaln(r)) \
+ N * r * np.log(p) \
+ np.sum(X * np.log(1 - (p if p < 1 else 1 - infinitesimal)))
return -result
# X is a numpy array representing the data
# initial params is a numpy array representing the initial values of
# size and prob parameters
def nbinom_fit(X, initial_params=None):
infinitesimal = np.finfo(np.float).eps
def log_likelihood(params, *args):
r, p = params
X = args[0]
N = X.size
# MLE estimate based on the formula on Wikipedia:
# http://en.wikipedia.org/wiki/Negative_binomial_distribution#Maximum_likelihood_estimation
result = np.sum(gammaln(X + r)) \
- np.sum(np.log(factorial(X))) \
- N * (gammaln(r)) \
+ N * r * np.log(p) \
+ np.sum(X * np.log(1 - (p if p < 1 else 1 - infinitesimal)))
return -result
def log_likelihood_deriv(params, *args):
r, p = params
X = args[0]
N = X.size
pderiv = (N * r) / p - np.sum(X) / \
(1 - (p if p < 1 else 1 - infinitesimal))
rderiv = np.sum(psi(X + r)) \
- N * psi(r) \
+ N * np.log(p)
return np.array([-rderiv, -pderiv])
if initial_params is None:
# reasonable initial values (from fitdistr function in R)
m = np.mean(X)
v = np.var(X)
size = (m**2) / (v - m) if v > m else 10
# convert mu/size parameterization to prob/size
p0 = size / ((size + m) if size + m != 0 else 1)
r0 = size
initial_params = np.array([r0, p0])
bounds = [(infinitesimal, None), (infinitesimal, 1)]
optimres = optim(log_likelihood,
x0=initial_params,
# fprime=log_likelihood_deriv,
args=(X,),
approx_grad=1,
bounds=bounds)
params = optimres[0]
return {'size': params[0], 'prob': params[1]}