# Data Analysis: OLS regressions WITH weather variables

## This notebook:
1. Continues previous OLS regressions
2. Add in weather variables

In [1]:
import pandas as pd
import statsmodels.formula.api as smf

In [3]:
d = pd.read_csv('merged3_final.csv')

## All records (16-24)

In [4]:
# previous best model
lm5 = smf.ols(formula="Count ~ weekday + season * O_hour * C(Event_type, Treatment(reference='no-event'))",data=d).fit()
lm5.summary()

# f = open('lm5.txt', 'w')
# f.write(lm5.summary().as_text())
# f.close()

0,1,2,3
Dep. Variable:,Count,R-squared:,0.48
Model:,OLS,Adj. R-squared:,0.475
Method:,Least Squares,F-statistic:,93.93
Date:,"Wed, 13 Dec 2017",Prob (F-statistic):,0.0
Time:,13:24:40,Log-Likelihood:,-11391.0
No. Observations:,2879,AIC:,22840.0
Df Residuals:,2850,BIC:,23010.0
Df Model:,28,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-17.3744,6.823,-2.547,0.011,-30.752,-3.996
weekday[T.True],1.3571,0.545,2.489,0.013,0.288,2.426
"C(Event_type, Treatment(reference='no-event'))[T.basketball]",30.7881,12.807,2.404,0.016,5.677,55.900
"C(Event_type, Treatment(reference='no-event'))[T.boxing]",84.3749,35.322,2.389,0.017,15.116,153.634
"C(Event_type, Treatment(reference='no-event'))[T.concert]",-27.8251,22.024,-1.263,0.207,-71.010,15.359
"C(Event_type, Treatment(reference='no-event'))[T.family]",88.5216,51.958,1.704,0.089,-13.357,190.400
"C(Event_type, Treatment(reference='no-event'))[T.hockey]",77.9004,18.918,4.118,0.000,40.805,114.996
"C(Event_type, Treatment(reference='no-event'))[T.other]",20.3641,40.326,0.505,0.614,-58.706,99.434
season,31.7726,2.563,12.397,0.000,26.747,36.798

0,1,2,3
Omnibus:,609.727,Durbin-Watson:,0.739
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1814.148
Skew:,1.085,Prob(JB):,0.0
Kurtosis:,6.227,Cond. No.,13200.0


In [5]:
# add weather vars
lm6 = smf.ols(formula="Count ~ weekday + precipitation + temperature + season * O_hour * C(Event_type, Treatment(reference='no-event'))",data=d).fit()
lm6.summary()

# f = open('lm6.txt', 'w')
# f.write(lm6.summary().as_text())
# f.close()

0,1,2,3
Dep. Variable:,Count,R-squared:,0.487
Model:,OLS,Adj. R-squared:,0.482
Method:,Least Squares,F-statistic:,90.24
Date:,"Wed, 13 Dec 2017",Prob (F-statistic):,0.0
Time:,13:25:05,Log-Likelihood:,-11371.0
No. Observations:,2879,AIC:,22800.0
Df Residuals:,2848,BIC:,22990.0
Df Model:,30,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-15.8495,6.803,-2.330,0.020,-29.189,-2.510
weekday[T.True],1.3963,0.542,2.578,0.010,0.334,2.458
"C(Event_type, Treatment(reference='no-event'))[T.basketball]",30.0250,12.722,2.360,0.018,5.080,54.970
"C(Event_type, Treatment(reference='no-event'))[T.boxing]",84.2040,35.082,2.400,0.016,15.415,152.993
"C(Event_type, Treatment(reference='no-event'))[T.concert]",-28.7988,21.876,-1.316,0.188,-71.692,14.095
"C(Event_type, Treatment(reference='no-event'))[T.family]",88.4005,51.608,1.713,0.087,-12.792,189.593
"C(Event_type, Treatment(reference='no-event'))[T.hockey]",77.6330,18.791,4.131,0.000,40.788,114.478
"C(Event_type, Treatment(reference='no-event'))[T.other]",19.0004,40.055,0.474,0.635,-59.539,97.540
precipitation,-5.1557,0.834,-6.182,0.000,-6.791,-3.520

0,1,2,3
Omnibus:,614.19,Durbin-Watson:,0.751
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1765.516
Skew:,1.105,Prob(JB):,0.0
Kurtosis:,6.136,Cond. No.,18600.0


## Subset records (19-24)

In [6]:
d2 = d[d['O_hour']>18]

In [7]:
lm7 = smf.ols(formula="Count ~ weekday + precipitation + temperature + season * O_hour * C(Event_type, Treatment(reference='no-event'))",data=d2).fit()
lm7.summary()

f = open('lm7.txt', 'w')
f.write(lm7.summary().as_text())
f.close()

## Subset records (19-24; w/o other events)

In [8]:
d3 = d2[d2['Event_type'] != 'other']
len(d2), len(d3)

(1795, 1760)

In [9]:
lm8 = smf.ols(formula="Count ~ weekday + precipitation + temperature + season * O_hour * C(Event_type, Treatment(reference='no-event'))",data=d3).fit()
lm8.summary()

f = open('lm8.txt', 'w')
f.write(lm8.summary().as_text())
f.close()

In [10]:
# not interacting season
lm9 = smf.ols(formula="Count ~ weekday + precipitation + temperature + season + O_hour * C(Event_type, Treatment(reference='no-event'))",data=d3).fit()
lm9.summary()

f = open('lm9.txt', 'w')
f.write(lm9.summary().as_text())
f.close()

In [11]:
# season * event + hour * event
lm10 = smf.ols(formula="Count ~ weekday + precipitation + temperature + season * C(Event_type, Treatment(reference='no-event')) + O_hour * C(Event_type, Treatment(reference='no-event'))",data=d3).fit()
lm10.summary()

# f = open('lm10.txt', 'w')
# f.write(lm10.summary().as_text())
# f.close()

0,1,2,3
Dep. Variable:,Count,R-squared:,0.549
Model:,OLS,Adj. R-squared:,0.544
Method:,Least Squares,F-statistic:,106.0
Date:,"Wed, 13 Dec 2017",Prob (F-statistic):,3.76e-283
Time:,13:26:40,Log-Likelihood:,-6510.2
No. Observations:,1760,AIC:,13060.0
Df Residuals:,1739,BIC:,13180.0
Df Model:,20,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,81.6046,4.452,18.330,0.000,72.873,90.337
weekday[T.True],4.9184,0.540,9.100,0.000,3.858,5.978
"C(Event_type, Treatment(reference='no-event'))[T.basketball]",-2.7380,10.446,-0.262,0.793,-23.226,17.750
"C(Event_type, Treatment(reference='no-event'))[T.boxing]",-28.9402,23.858,-1.213,0.225,-75.734,17.853
"C(Event_type, Treatment(reference='no-event'))[T.concert]",-11.7123,12.935,-0.905,0.365,-37.083,13.658
"C(Event_type, Treatment(reference='no-event'))[T.family]",72.2218,20.245,3.567,0.000,32.515,111.928
"C(Event_type, Treatment(reference='no-event'))[T.hockey]",70.2299,13.695,5.128,0.000,43.370,97.090
precipitation,-3.5322,0.846,-4.174,0.000,-5.192,-1.873
temperature,0.0070,0.015,0.461,0.645,-0.023,0.037

0,1,2,3
Omnibus:,343.723,Durbin-Watson:,1.113
Prob(Omnibus):,0.0,Jarque-Bera (JB):,826.255
Skew:,1.067,Prob(JB):,3.8099999999999997e-180
Kurtosis:,5.59,Cond. No.,6510.0


In [12]:
# final simplified model
lm11 = smf.ols(formula="Count ~ season + precipitation + temperature + weekday + O_hour + C(Event_type, Treatment(reference='no-event'))",data=d3).fit()
lm11.summary()

f = open('lm11.txt', 'w')
f.write(lm11.summary().as_text())
f.close()