-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathPoisson Regression.py
44 lines (34 loc) · 1.29 KB
/
Poisson Regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
# http://staff.bath.ac.uk/pssiw/stats2/page16/page16.html
df = pd.read_csv("../data/awards.csv", index_col=0)
print(df.head())
print(df.describe())
df = pd.get_dummies(df, columns=["prog"])
del df['prog_1']
print(df.head())
df['math'] = (df['math'] - np.mean(df['math']))/(2 * np.std(df['math']))
print(df.head())
X = np.column_stack(
(np.ones((df.shape[0], 1)), df[['math', 'prog_2', 'prog_3']]))
y = df['num_awards']
mod = sm.formula.GLM(y, X, family=sm.families.Poisson()).fit()
print(mod.summary())
model_fitted_y = mod.fittedvalues
model_residuals = mod.df_resid
model_abs_resid = np.abs(model_residuals)
# https://medium.com/@emredjan/emulating-r-regression-plots-in-python-43741952c034
plot_lm_1 = plt.figure(1)
plot_lm_1.set_figheight(8)
plot_lm_1.set_figwidth(12)
plot_lm_1.axes[0] = sns.residplot(model_fitted_y, 'num_awards', data=df,
lowess=True,
scatter_kws={'alpha': 0.5},
line_kws={'color': 'red', 'lw': 2, 'alpha': 0.8})
plot_lm_1.axes[0].set_title('Residuals vs Fitted')
plot_lm_1.axes[0].set_xlabel('Fitted values')
plot_lm_1.axes[0].set_ylabel('Residuals')
plt.show()