# Advanced Microeconometrics 2023 Final Exam: Getting Started with the Data

*9:00 am January 13th to 9:00 am January 15th, 2024* 

The code herein is merely intended as a help for reading in the data. You are free to disregard it entirely. 

In [46]:
import pandas as pd
import numpy as np
from scipy.stats import norm

In [36]:
import estimation as est
import LinearModels as lm
import SampleSelection as ssm

#autoload
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Cross Sectional Data

In [37]:
data = pd.read_csv('data.csv')

In [38]:
# Extracting the data
y = data['y'].values
x = data['x'].values
s = data['s'].values

In [39]:
y = y.reshape((len(y), 1))
x = x.reshape((len(x), 1))
s = s.reshape((len(s), 1))

# Q7

In [40]:
Starting_values = ssm.starting_values(s,x)
#augment with a 0 column
start = np.concatenate((Starting_values, np.zeros((Starting_values.shape[0],1))), axis=1)


In [41]:
#Estimate first step
first_step = est.estimate(ssm.q, start, s, x, cov_type='Outer Product')

Optimization terminated successfully.
         Current function value: 0.429875
         Iterations: 10
         Function evaluations: 33
         Gradient evaluations: 11


In [42]:
first_step_parlbl = ['theta', 'eta']

In [43]:
est.print_table(first_step_parlbl, first_step, title="First step results")

Optimizer succeded after 10 iter. (33 func. evals.). Final criterion:   0.4299.
First step results


Unnamed: 0,theta,se,t
theta,1.5041,0.1064,14.135
eta,0.4888,0.0689,7.093


# Q11

In [64]:
#extract coefficients
delta = first_step['theta'][0]
eta = first_step['theta'][1]
z = delta*x*np.exp(-eta*x)

#estimating inverse mills ratio
mlambda = norm.pdf(z)/norm.cdf(z)

#keep only the ones that are selected
mlambda = mlambda[s.flatten()==1]  

In [73]:
#adjust x and y to only keep the ones that are selected
x2 = x[s.flatten()==1]
y2 = y[s.flatten()==1]

In [74]:
#add the IMR as a generated regressor
ximr = np.hstack((x2, mlambda))

In [75]:
#update labels
xlbl = ['x', 'IMR']

In [77]:
#second step
second_step = lm.estimate(y2, ximr)

In [79]:
#extract coefficients
lm.print_table(('y', xlbl), second_step, title="Second step results")


Second step results
Dependent variable: y

           Beta         Se    t-values
---  ----------  ---------  ----------
x     2.09112    0.0662515   31.5633
IMR  -0.0219985  0.0928763   -0.236858
R² = 0.625
σ² = 1.439
