
# ProblemSet #3

In [362]:
# import packages
import math
import pandas as pd
import numpy as np
import scipy.optimize as opt
import scipy.stats as stats

In [363]:
# Read the data
ps3_data = pd.read_stata('PS3_data.dta')
ps3_data.head(n=5)

Unnamed: 0,id68,year,intid,relhh,hannhrs,wannhrs,hlabinc,wlabinc,nochild,wrace,...,redpregovinc,hsex,wsex,age,wage,hpersno,wpersno,hyrsed,wyrsed,pce
0,1,1967,1,Head,1200.0,2000.0,,,0,,...,5614.0,1.0,2.0,52.0,46.0,1.0,2.0,8.0,8.0,0.0
1,2,1967,2,Head,0.0,0.0,,,0,,...,0.0,1.0,2.0,56.0,57.0,1.0,2.0,3.0,3.0,0.0
2,3,1967,3,Head,0.0,0.0,,,0,,...,0.0,1.0,2.0,77.0,64.0,1.0,2.0,,3.0,0.0
3,4,1967,4,Head,1560.0,0.0,,,6,1.0,...,3280.0,1.0,2.0,45.0,44.0,1.0,2.0,8.0,5.0,0.0
4,5,1967,5,Head,2500.0,2000.0,,,3,1.0,...,7900.0,1.0,2.0,24.0,22.0,1.0,2.0,10.0,9.0,0.0


In [365]:
#1.clean data
# male heads of household
ps3_data = ps3_data[(ps3_data['hsex'] == 1.0)]
# age in the range of [25,60]
ps3_data = ps3_data[(ps3_data['age'] >= 25) & (ps3_data['age'] <= 60)]
# annual income /annual hour > $7/hr
ps3_data = ps3_data[(ps3_data['hlabinc']/ps3_data['hannhrs'] >= 7)]
# hyrsed is not null
ps3_data=ps3_data[ps3_data.hyrsed.isnull() != True ]

In [366]:
#2. create variables
ps3_data['lnwage'] = np.log(ps3_data['hlabinc'])
ps3_data['Black'] = ps3_data['hrace'] == 2
ps3_data['Hispanic'] = ps3_data['hrace'] == 5
ps3_data['OtherRace'] = (ps3_data['hrace'] != 1) & (ps3_data['hrace'] != 2) & (ps3_data['hrace'] != 5)
# beta is dictionary to store beta1 for different years
beta = {'beta': []}

In [370]:
#3.Maximum Likelihood Estimator
# I manualy change year from 1971 to 2000 and save beta_1 to dictionary beta
ps_data=ps3_data[ps3_data['year']==2000]

def mle(params):
        a = params[0]
        b = params[1]
        c = params[2]
        d = params[3]
        e = params[4]
        f = params[5]
        sigma = params[6]

        ps_data['yhat'] = a + b * ps_data['hyrsed'] + c*ps_data['age']+d*ps_data['Black']+e*ps_data['Hispanic']+f*ps_data['OtherRace']
        L = -np.sum(stats.norm.logpdf(ps_data['lnwage'], loc=ps_data['yhat'], scale=sigma))
        return L
    
# initial parameter guesses 
initialparams = [0.2,0.5,0.1,0.4,0,0.3,1]

# minimize
results = opt.minimize(mle, initialparams, method='nelder-mead')

#add to dictionary beta
beta['beta'].append(results['x'][1])
beta

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


{'beta': [0.071707741377173145,
  0.082042283955572323,
  0.10995402582884446,
  0.11644408223335215]}

In [371]:
# check with ols regression results
# results are similiar
from statsmodels.formula.api import ols
fit = ols('lnwage ~ hyrsed+age+C(Black)+C(Hispanic)+C(OtherRace)', data=ps_data).fit()
fit.summary()

0,1,2,3
Dep. Variable:,lnwage,R-squared:,0.179
Model:,OLS,Adj. R-squared:,0.177
Method:,Least Squares,F-statistic:,140.8
Date:,"Mon, 30 Sep 2019",Prob (F-statistic):,5.42e-109
Time:,21:45:43,Log-Likelihood:,-2477.9
No. Observations:,2596,AIC:,4966.0
Df Residuals:,2591,BIC:,4995.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,8.7485,0.094,93.357,0.000,8.565,8.932
C(Black)[T.True],-0.3076,0.056,-5.522,0.000,-0.417,-0.198
C(Hispanic)[T.True],1.097e-14,1.22e-16,89.975,0.000,1.07e-14,1.12e-14
C(OtherRace)[T.True],-0.0981,0.064,-1.526,0.127,-0.224,0.028
hyrsed,0.1164,0.006,19.413,0.000,0.105,0.128
age,0.0108,0.001,8.143,0.000,0.008,0.013

0,1,2,3
Omnibus:,332.818,Durbin-Watson:,1.994
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2173.765
Skew:,-0.413,Prob(JB):,0.0
Kurtosis:,7.406,Cond. No.,1.15e+19


In [None]:
# add bounds
bnds = ((None, None), (None, None), (None, None),
        (None, None), (None, None), (None, None),(0, None))
results2 =opt.minimize(mle, initialparams,method='L-BFGS-B',bounds=bnds)
results2['x']
results3 = opt.minimize(mle, initialparams,method='SLSQP',bounds=bnds)
results3['x']

# 4 
$\beta_1$ means that if individual spends 1 more year in school and keep age and race constant, his/her annual income increases by $ \beta_1 $% on average.
Becasue $ \beta_1 $ increases over time, returns to education increase over time.