Skip to content

Commit

Permalink
Merge commit 'v0.5.0-13-g8e07d34' into debian
Browse files Browse the repository at this point in the history
* commit 'v0.5.0-13-g8e07d34':
  Backport PR statsmodels#1200: BLD: do not install *.pyx *.c  MANIFEST.in
  Backport PR statsmodels#1157: Tst precision master
  Backport PR statsmodels#1149: BUG: Fix small data issues for ARIMA.
  Backport PR statsmodels#1125: REF/BUG: Some GLM cleanup. Used trimmed results in NegativeBinomial variance.
  Backport PR statsmodels#1124: BUG: Fix ARIMA prediction when fit without a trend.
  Backport PR statsmodels#1117: Update ex_arma2.py
  Backport PR statsmodels#1089: ENH: exp(poisson.logpmf()) for poisson better behaved.
  Backport PR statsmodels#1077: BUG: Allow 1d exog in ARMAX forecasting.
  Backport PR statsmodels#1075: BLD: Fix build issue on some versions of easy_install.
  Backport PR statsmodels#1071: Update setup.py to fix broken install on OSX
  Backport PR statsmodels#1057: COMPAT: Fix py3 caching for get_rdatasets.
  BUG: fix predict (was refactoring victim)
  • Loading branch information
yarikoptic committed Nov 29, 2013
2 parents 9f2a17d + 8e07d34 commit 985a57e
Show file tree
Hide file tree
Showing 17 changed files with 197 additions and 54 deletions.
4 changes: 2 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
global-include *.csv *.py *.txt *.pyx *.c
global-include *.csv *.py *.txt
#scikits*.*
include MANIFEST.in

Expand Down Expand Up @@ -44,4 +44,4 @@ include statsmodels/stats/libqsturng/LICENSE.txt
include statsmodels/regression/tests/results/leverage_influence_ols_nostars.txt


global-exclude *~ *.swp *.pyc *.bak
global-exclude *~ *.swp *.pyc *.bak *.pyx
2 changes: 1 addition & 1 deletion docs/source/gettingstarted.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ provides labelled arrays of (potentially heterogenous) data, similar to the
comma-separated values file to a ``DataFrame`` object.

`patsy <https://github.com/pydata/patsy>`_ is a Python library for describing
satistical models and building `Design Matrices
statistical models and building `Design Matrices
<http://en.wikipedia.org/wiki/Design_matrix>`_ using ``R``-like formulas.

Data
Expand Down
6 changes: 3 additions & 3 deletions examples/tsa/ex_arma2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
# The conventions of the arma_generate function require that we specify a
# 1 for the zero-lag of the AR and MA parameters and that the AR parameters
# be negated.
arparams = np.r_[1, -arparams]
maparam = np.r_[1, maparams]
ar = np.r_[1, -arparams]
ma = np.r_[1, maparams]
nobs = 250
y = arma_generate_sample(arparams, maparams, nobs)
y = arma_generate_sample(ar, ma, nobs)

# Now, optionally, we can add some dates information. For this example,
# we'll use a pandas time series.
Expand Down
9 changes: 7 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
import subprocess
import re

# temporarily redirect config directory to prevent matplotlib importing
# testing that for writeable directory which results in sandbox error in
# certain easy_install versions
os.environ["MPLCONFIGDIR"] = "."

# may need to work around setuptools bug by providing a fake Pyrex
try:
import Cython
Expand Down Expand Up @@ -436,12 +441,12 @@ def get_data_files():
continue
path = pjoin(root, i)
if os.path.isdir(path):
data_files.update({relpath(path).replace(sep, ".") : ["*.csv",
data_files.update({relpath(path, start=curdir).replace(sep, ".") : ["*.csv",
"*.dta"]})
# add all the tests and results files
for r, ds, fs in os.walk(pjoin(curdir, "statsmodels")):
if r.endswith('results') and 'sandbox' not in r:
data_files.update({relpath(r).replace(sep, ".") : ["*.csv",
data_files.update({relpath(r, start=curdir).replace(sep, ".") : ["*.csv",
"*.txt"]})

return data_files
Expand Down
9 changes: 8 additions & 1 deletion statsmodels/base/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,14 @@ def _get_names(self, arr):
def _get_yarr(self, endog):
if data_util._is_structured_ndarray(endog):
endog = data_util.struct_to_ndarray(endog)
return np.asarray(endog).squeeze()
endog = np.asarray(endog)
if len(endog) == 1: # never squeeze to a scalar
if endog.ndim == 1:
return endog
elif endog.ndim > 1:
return np.asarray([endog.squeeze()])

return endog.squeeze()

def _get_xarr(self, exog):
if data_util._is_structured_ndarray(exog):
Expand Down
11 changes: 7 additions & 4 deletions statsmodels/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@ def _cache_it(data, cache_path):
if sys.version_info[0] >= 3:
# for some reason encode("zip") won't work for me in Python 3?
import zlib
open(cache_path, "wb").write(zlib.compress(pickle.dumps(data)))
# use protocol 2 so can open with python 2.x if cached in 3.x
open(cache_path, "wb").write(zlib.compress(pickle.dumps(data,
protocol=2)))
else:
open(cache_path, "wb").write(pickle.dumps(data).encode("zip"))

Expand All @@ -141,7 +143,8 @@ def _open_cache(cache_path):
# Python 3 build
import zlib
data = zlib.decompress(open(cache_path, 'rb').read())
data = pickle.loads(data)
# return as bytes object encoded in utf-8 for cross-compat of cached
data = pickle.loads(data).encode('utf-8')
else:
data = open(cache_path, 'rb').read().decode('zip')
data = pickle.loads(data)
Expand Down Expand Up @@ -181,9 +184,9 @@ def _get_data(base_url, dataname, cache, extension="csv"):
else:
raise err

#Python 3, don't think there will be any unicode in r datasets
#Python 3, always decode as unicode
if sys.version[0] == '3': # pragma: no cover
data = data.decode('ascii', errors='strict')
data = data.decode('utf-8', errors='strict')
return StringIO(data), from_cache


Expand Down
2 changes: 1 addition & 1 deletion statsmodels/discrete/discrete_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ def pdf(self, X):
The parameter `X` is :math:`x_{i}\\beta` in the above formula.
"""
y = self.endog
return stats.poisson.pmf(y, np.exp(X))
return np.exp(stats.poisson.logpmf(y, np.exp(X)))

def loglike(self, params):
"""
Expand Down
8 changes: 4 additions & 4 deletions statsmodels/genmod/families/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from scipy.stats import ss
import links as L
import varfuncs as V
FLOAT_EPS = np.finfo(float).eps

class Family(object):

Expand All @@ -28,7 +29,6 @@ class Family(object):
#TODO: change these class attributes, use valid somewhere...
valid = [-np.inf, np.inf]

tol = 1.0e-05
links = []

def _setlink(self, link):
Expand Down Expand Up @@ -563,7 +563,7 @@ def _clean(self, x):
possible that other families might need a check for validity of the
domain.
"""
return np.clip(x, 1.0e-10, np.inf)
return np.clip(x, FLOAT_EPS, np.inf)

def deviance(self, Y, mu, scale=1.):
"""
Expand Down Expand Up @@ -696,7 +696,7 @@ class Binomial(Family):
endog for Binomial can be specified in one of three ways.
"""

links = [L.logit, L.probit, L.cauchy, L.log, L.cloglog]
links = [L.logit, L.probit, L.cauchy, L.log, L.cloglog, L.identity]
variance = V.binary # this is not used below in an effort to include n

def __init__(self, link=L.logit): #, n=1.):
Expand Down Expand Up @@ -1102,7 +1102,7 @@ def _clean(self, x):
possible that other families might need a check for validity of the
domain.
"""
return np.clip(x, 1.0e-10, np.inf)
return np.clip(x, FLOAT_EPS, np.inf)

def deviance(self, Y, mu, scale=1.):
"""
Expand Down
19 changes: 7 additions & 12 deletions statsmodels/genmod/families/links.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import scipy.stats
FLOAT_EPS = np.finfo(float).eps

#TODO: are the instance actually "aliases"
# I used this terminology in varfuncs as well -ss
Expand Down Expand Up @@ -70,17 +71,15 @@ class Logit(Link):
Notes
-----
call and derivative use a private method _clean to make trim p by
1e-10 so that p is in (0,1)
machine epsilon so that p is in (0,1)
Alias of Logit:
logit = Logit()
"""

tol = 1.0e-10

def _clean(self, p):
"""
Clip logistic values to range (tol, 1-tol)
Clip logistic values to range (eps, 1-eps)
Parameters
-----------
Expand All @@ -92,7 +91,7 @@ def _clean(self, p):
pclip : array
Clipped probabilities
"""
return np.clip(p, Logit.tol, 1. - Logit.tol)
return np.clip(p, FLOAT_EPS, 1. - FLOAT_EPS)

def __call__(self, p):
"""
Expand Down Expand Up @@ -310,13 +309,11 @@ class Log(Link):
Notes
-----
call and derivative call a private method _clean to trim the data by
1e-10 so that p is in (0,1). log is an alias of Log.
machine epsilon so that p is in (0,1). log is an alias of Log.
"""

tol = 1.0e-10

def _clean(self, x):
return np.clip(x, Logit.tol, np.inf)
return np.clip(x, FLOAT_EPS, np.inf)

def __call__(self, p, **extra):
"""
Expand Down Expand Up @@ -599,13 +596,11 @@ class NegativeBinomial(object):
values are usually assumed to be in (.01,2).
'''

tol = 1.0e-10

def __init__(self, alpha=1.):
self.alpha = alpha

def _clean(self, x):
return np.clip(x, NegativeBinomial.tol, np.inf)
return np.clip(x, FLOAT_EPS, np.inf)

def __call__(self, x):
'''
Expand Down
17 changes: 8 additions & 9 deletions statsmodels/genmod/families/varfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
__docformat__ = 'restructuredtext'

import numpy as np
FLOAT_EPS = np.finfo(float).eps

class VarianceFunction(object):
"""
Expand Down Expand Up @@ -147,16 +148,15 @@ class Binomial(object):
Alias for Binomial:
binary = Binomial()
A private method _clean trims the data by 1e-10 so that p is in (0,1)
A private method _clean trims the data by machine epsilon so that p is
in (0,1)
"""

tol = 1.0e-10

def __init__(self, n=1):
self.n = n

def _clean(self, p):
return np.clip(p, Binomial.tol, 1 - Binomial.tol)
return np.clip(p, FLOAT_EPS, 1 - FLOAT_EPS)

def __call__(self, mu):
"""
Expand Down Expand Up @@ -208,16 +208,15 @@ class NegativeBinomial(object):
Alias for NegativeBinomial:
nbinom = NegativeBinomial()
A private method _clean trims the data by 1e-10 so that p is in (0,inf)
A private method _clean trims the data by machine epsilon so that p is
in (0,inf)
'''

tol = 1.0e-10

def __init__(self, alpha=1.):
self.alpha = alpha

def _clean(self, p):
return np.clip(p, NegativeBinomial.tol, np.inf)
return np.clip(p, FLOAT_EPS, np.inf)

def __call__(self, mu):
"""
Expand All @@ -234,7 +233,7 @@ def __call__(self, mu):
variance = mu + alpha*mu**2
"""
p = self._clean(mu)
return mu + self.alpha*mu**2
return p + self.alpha*p**2

nbinom = NegativeBinomial()
nbinom.__doc__ = """
Expand Down
6 changes: 5 additions & 1 deletion statsmodels/regression/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,11 @@ def __init__(self, endog, exog, weights=1., missing='none', hasconst=None):
weights = np.array(weights)
if weights.shape == ():
weights = np.repeat(weights, len(endog))
weights = weights.squeeze()
# handle case that endog might be of len == 1
if len(weights) == 1:
weights = np.array([weights.squeeze()])
else:
weights = weights.squeeze()
super(WLS, self).__init__(endog, exog, missing=missing,
weights=weights, hasconst=hasconst)
nobs = self.exog.shape[0]
Expand Down
5 changes: 4 additions & 1 deletion statsmodels/sandbox/regression/gmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def fit(self):
return lfit

#copied from GLS, because I subclass currently LikelihoodModel and not GLS
def predict(self, exog, params=None):
def predict(self, params, exog=None):
"""
Return linear predicted values from a design matrix.
Expand All @@ -161,6 +161,9 @@ def predict(self, exog, params=None):
-----
If the model as not yet been fit, params is not optional.
"""
if exog is None:
exog = self.exog
return np.dot(exog, params)
#JP: this doesn't look correct for GLMAR
#SS: it needs its own predict method
if self._results is None and params is None:
Expand Down
6 changes: 3 additions & 3 deletions statsmodels/sandbox/tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def test_pca_svd():
xred_svd, factors_svd, evals_svd, evecs_svd = pcasvd(xf, keepdim=0)
assert_array_almost_equal(evals_svd, evals, 14)
msign = (evecs/evecs_svd)[0]
assert_array_almost_equal(msign*evecs_svd, evecs, 14)
assert_array_almost_equal(msign*factors_svd, factors, 13)
assert_array_almost_equal(xred_svd, xreduced, 14)
assert_array_almost_equal(msign*evecs_svd, evecs, 13)
assert_array_almost_equal(msign*factors_svd, factors, 12)
assert_array_almost_equal(xred_svd, xreduced, 13)

pcares = pca(xf, keepdim=2)
pcasvdres = pcasvd(xf, keepdim=2)
Expand Down
6 changes: 3 additions & 3 deletions statsmodels/stats/tests/test_diagnostic.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,10 +554,10 @@ def test_normality(self):
lf2 = smsdia.lillifors(res.resid**2)
lf3 = smsdia.lillifors(res.resid[:20])

compare_t_est(lf1, lillifors1, decimal=(15, 14))
compare_t_est(lf2, lillifors2, decimal=(15, 15)) #pvalue very small
compare_t_est(lf1, lillifors1, decimal=(14, 14))
compare_t_est(lf2, lillifors2, decimal=(14, 14)) #pvalue very small
assert_approx_equal(lf2[1], lillifors2['pvalue'], significant=10)
compare_t_est(lf3, lillifors3, decimal=(15, 1))
compare_t_est(lf3, lillifors3, decimal=(14, 1))
#R uses different approximation for pvalue in last case

#> ad = ad.test(residuals(fm))
Expand Down
Loading

0 comments on commit 985a57e

Please sign in to comment.