# Production Technology

The dataset contains `N = 441` firms observed over `T = 12` years, 1968-1979. There variables are: 
* `lcap`: Log of capital stock, $k_{it}$ 
* `lemp`: log of employment, $\ell_{it}$ 
* `ldsa`: log of deflated sales, $y_{it}$
* `year`: the calendar year of the observation, `year` $ = 1968, ..., 1979$, 
* `firmid`: anonymized indicator variable for the firm, $i = 1, ..., N$, with $N=441$. 

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd 
import numpy as np
import seaborn as sns
import Project_1 as lm
from scipy.stats import chi2
from scipy.stats import f
from scipy.stats import norm
from numpy import linalg as la


In [None]:
dat = pd.read_csv('firms.csv')

In [None]:
dat.sample(5)

In [None]:
dat.year.unique()

# Descriptives

In [None]:
dat.describe()

In [None]:
dat[['lcap','lemp','ldsa']].hist();

In [None]:
sns.scatterplot(x='lemp', y='ldsa', data=dat); 

# Converting data to numpy format 

In [None]:
dat.ldsa.values.shape

In [None]:
N = dat.firmid.unique().size
T = dat.year.unique().size
assert dat.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Using only the odd years of the provided data

In [None]:
# Filter the data for odd years
dat_odd_years = dat[dat['year'] % 2 != 0].copy()
dat_odd_years.ldsa.values.shape
dat_odd_years.head(5)


In [None]:
# Update T
N = dat_odd_years.firmid.unique().size
T = dat_odd_years.year.unique().size
assert dat_odd_years.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Extract data from `pandas` to `numpy` arrays. 

In [None]:
y = dat_odd_years.ldsa.values.reshape((N*T,1))

ones = np.ones((N*T,1))
l = dat_odd_years.lemp.values.reshape((N*T,1))
k = dat_odd_years.lcap.values.reshape((N*T,1))
x = np.hstack([l, k])

In [None]:
# Label the dependent and independent variables
label_y = 'Log deflated sales'
label_x = [
    'log of employment',
    'log of adjusted capital stock'
    ]

## FE model

In [None]:
# Transform the data
Q_T = np.eye(T) - np.tile(1/T, (T, T))
y_dot = lm.perm(Q_T, y)
x_dot = lm.perm(Q_T, x)

# Remove the columns that are only zeroes
x_dot, label_x_dot = lm.remove_zero_columns(x_dot, label_x)

# Estimate 
fe_result = lm.estimate(y_dot, x_dot, transform='fe', T=T, robust_se='True')
lm.print_table((label_y, label_x_dot), fe_result, title="Fixed Effects", floatfmt='.4f')

# Determine significance level coefficients
p_value_empl = 2* (1-norm.cdf(15.4404))
p_value_cap = 2* (1-norm.cdf(4.2084))
print(f"P-value of log of employment: {p_value_empl:4f}")
print(f"P-value of log of capital stock: {p_value_cap:4f}")

## FD model

In [None]:
# Transform the data
D_T = (np.eye(T) - np.eye(T, k=-1))[1:]

y_diff = lm.perm(D_T, y)
x_diff = lm.perm(D_T, x)

# Remove the columns that are only zeroes
x_diff, label_x_diff = lm.remove_zero_columns(x_diff, label_x)

# Estimate 
fd_result = lm.estimate(y_diff, x_diff, transform='fd', T=T-1, robust_se='True')
lm.print_table((label_y, label_x_diff), fd_result, title="First Difference", floatfmt='.4f')

# Determine significance level coefficients
p_value_empl = 2* (1-norm.cdf(20.2966))
p_value_cap = 2* (1-norm.cdf(1.9580))
print(f"P-value of log of employment: {p_value_empl:4f}")
print(f"P-value of log of capital stock: {p_value_cap:4f}")

## Test for strict exogeneity

In [None]:
# To lead variables
F_T = np.eye(T, k=1)[:-1]

# Remove the last observed year for every individual
I_T = np.eye(T, k=0)[:-1]

x_exo = lm.perm(I_T, x)
y_exo = lm.perm(I_T, y)

# Within transform the data
Q_T = np.eye(T-1) - np.tile(1/(T-1), ((T-1), (T-1))) #Demeaning matrix
yw_exo = lm.perm(Q_T, y_exo)

#### Testing FE.1 ####

In [None]:
# Lead employment
empl_lead = lm.perm(F_T, x[:, 0].reshape(-1, 1))

# Add empl_lead to x_exo
x_exo_empl = np.hstack((x_exo, empl_lead))

# Within transform the data
xw_exo_empl = lm.perm(Q_T, x_exo_empl)

# Estimate model
exo_test_empl = lm.estimate(yw_exo, xw_exo_empl, T=T-1, transform='fe', robust_se='True')

# Print results
label_exo_empl = label_x + ['Employment lead']
lm.print_table((label_y, label_exo_empl), exo_test_empl, title='Exogeneity FE test', floatfmt='.4f')

# Determine significance level coefficients
p_value_empl = 2* (1-norm.cdf(13.3856))
p_value_cap = 2* (1-norm.cdf(3.8632))
p_value_lead_empl = 2* (1-norm.cdf(2.5487))
print(f"P-value of log of employment: {p_value_empl:4f}")
print(f"P-value of log of capital stock: {p_value_cap:4f}")
print(f"P-value of lead of log of employment: {p_value_lead_empl:4f}")


In [None]:
# Lead capital
cap_lead = lm.perm(F_T, x[:, 1].reshape(-1, 1))

# Add cap_lead to x_exo
x_exo_cap = np.hstack((x_exo, cap_lead))

# Within transform the data
xw_exo_cap = lm.perm(Q_T, x_exo_cap)

# Estimate model
exo_test_cap = lm.estimate(yw_exo, xw_exo_cap, T=T-1, transform='fe', robust_se='True')

# Print results
label_exo_cap = label_x + ['Capital lead']
lm.print_table((label_y, label_exo_cap), exo_test_cap, title='Exogeneity FE test', floatfmt='.4f')

# Determine significance level coefficients
p_value_empl = 2* (1-norm.cdf(12.1547))
p_value_cap = 2* (1-norm.cdf(1.6145))
p_value_lead_cap = 2* (1-norm.cdf(3.8934))
print(f"P-value of log of employment: {p_value_empl:4f}")
print(f"P-value of log of capital stock: {p_value_cap:4f}")
print(f"P-value of lead of log of capital: {p_value_lead_cap:4f}")


In [None]:
# Add both leads to x_exo
x_exo_joint = np.hstack((x_exo, empl_lead, cap_lead))

# Within transform the data
xw_exo_joint = lm.perm(Q_T, x_exo_joint)

# Estimate model
exo_test_joint = lm.estimate(yw_exo, xw_exo_joint, T=T-1, transform='fe', robust_se='True')

# Print results
label_exo_joint = label_x + ['Employment lead'] + ['Capital lead']
lm.print_table((label_y, label_exo_joint), exo_test_joint, title='Exogeneity FE test', floatfmt='.4f')

# Determine significance level coefficients
p_value_empl = 2* (1-norm.cdf(12.2198))
p_value_cap = 2* (1-norm.cdf(1.7015))
p_value_lead_empl = 2* (1-norm.cdf(1.3043))
p_value_lead_cap = 2* (1-norm.cdf(3.4738))
print(f"P-value of log of employment: {p_value_empl:4f}")
print(f"P-value of log of capital stock: {p_value_cap:4f}")
print(f"P-value of lead of log of employment: {p_value_lead_empl:4f}")
print(f"P-value of lead of log of capital: {p_value_lead_cap:4f}")

Testing the joint signficance of the two leads

In [None]:
# Get the sum of squared residuals 
RSS_fe = fe_result['SSR'] 
RSS_felead = exo_test_joint['SSR']

# Number of restrictions
q = 2

# Number of parameters in unrestricted model
par = xw_exo_joint.shape[1]

#Degrees of freedom in the unrestricted model
df = N*(T-1) - N - par
              
# Compute the F-statistic
F_stat = ((RSS_fe - RSS_felead) / q) / (RSS_felead / df)
crit_val = f.ppf(0.95, q, df)
p_value = 1 - f.cdf(F_stat.item(), q, df)

print(f"F-statistic: {F_stat.item():.4f}")
print(f"Critical value: {crit_val:.4f}")
print(f"P-value: {p_value:.4f}")

#### Testing FD.1

In [None]:
#Define new variables
l_delta = x_diff[:,0].reshape(-1,1)
k_delta = x_diff[:,1].reshape(-1,1)
l_level = l
k_level = k

# Align dimensions over time
l_level = np.delete(l_level, np.arange(0, l_level.shape[0], T)).reshape(-1,1)
k_level = np.delete(k_level, np.arange(0, k_level.shape[0], T)).reshape(-1,1)

In [None]:
# Stacking in X_delta
x_delta_l = np.column_stack((l_delta, k_delta, l_level))

# Estimate the regression by OLS
exo_l = lm.estimate(y=y_diff, x=x_delta_l, transform='', T=T-1, robust_se='True')

# Print results
label_exo_l = label_x + ['Employment level']
lm.print_table((label_y, label_exo_l), exo_l, title='Exogeneity FD test', floatfmt='.4f')

# Determine significance level coefficients
p_value_empl = 2* (1-norm.cdf(20.2249))
p_value_cap = 2* (1-norm.cdf(1.9769))
p_value_level_empl = 2* (1-norm.cdf(0.7974))
print(f"P-value of log of employment: {p_value_empl:4f}")
print(f"P-value of log of capital stock: {p_value_cap:4f}")
print(f"P-value of level of log of employment: {p_value_level_empl:4f}")
 

In [None]:
# Stacking in X_delta
x_delta_k = np.column_stack((l_delta, k_delta, k_level))

# Estimate the regression by OLS
exo_k = lm.estimate(y=y_diff, x=x_delta_k, transform='', T=T-1, robust_se='True')

# Print results
label_exo_k = label_x + ['Capital level']
lm.print_table((label_y, label_exo_k), exo_k, title='Exogeneity FD test', floatfmt='.4f')

# Determine significance level coefficients
p_value_empl = 2* (1-norm.cdf(20.0518))
p_value_cap = 2* (1-norm.cdf(1.9757))
p_value_level_cap = 2* (1-norm.cdf(0.7645))
print(f"P-value of log of employment: {p_value_empl:4f}")
print(f"P-value of log of capital stock: {p_value_cap:4f}")
print(f"P-value of level of log of capital: {p_value_level_cap:4f}")
 

In [None]:
# Stacking in X_delta
x_delta_joint = np.column_stack((l_delta, k_delta, l_level, k_level))

# Estimate the regression by OLS
exo_joint_fd = lm.estimate(y=y_diff, x=x_delta_joint, transform='', T=T-1, robust_se='True')

# Print results
label_exo_joint_fd = label_x + ['Employment level'] + ['Capital level']
lm.print_table((label_y, label_exo_joint_fd), exo_joint_fd, title='Exogeneity FD test', floatfmt='.4f')

# Determine significance level coefficients
p_value_empl = 2* (1-norm.cdf(19.9291))
p_value_cap = 2* (1-norm.cdf(1.9747))
p_value_level_empl = 2* (1-norm.cdf(0.3397))
p_value_level_cap = 2* (1-norm.cdf(0.0299))
print(f"P-value of log of employment: {p_value_empl:4f}")
print(f"P-value of log of capital stock: {p_value_cap:4f}")
print(f"P-value of level of log of employment: {p_value_level_empl:4f}")
print(f"P-value of level of log of capital: {p_value_level_cap:4f}")


Testing the joint signficance of the two leads

In [None]:
# Get the sum of squared residuals 
RSS_fd = fd_result['SSR'] 
RSS_fdlevel = exo_joint_fd['SSR']

# Number of restrictions
q = 2

# Number of parameters in unrestricted model
par = x_delta_joint.shape[1]

#Degrees of freedom in the unrestricted model
df = N*(T-1) - par
              
# Compute the F-statistic
F_stat = ((RSS_fd - RSS_fdlevel) / q) / (RSS_fdlevel / df)
crit_val = f.ppf(0.95, q, df)
p_value = 1 - f.cdf(F_stat.item(), q, df)

print(f"F-statistic: {F_stat.item():.4f}")
print(f"Critical value: {crit_val:.4f}")
print(f"P-value: {p_value:.4f}")

### Test for constant returns to scale

#### FD model

In [None]:
# Define null hypothesis: R * b_hat = 1 (sum of first two coefficients equals 1)
R = np.array([[1, 1]])
r = np.array([[1]])

# Extract b_hat and covariance matrix
b_hat = fd_result['b_hat']  # Estimated coefficients
cov = fd_result['cov']      # Covariance matrix of coefficients

# Perform Wald test
w_stat, crit_val, p_value = lm.wald_test(b_hat, cov, R, r)

print(f'The test statistic is {w_stat.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_value:.8f}.')

if w_stat > crit_val:
    print(f"Reject null hypothesis: We reject CRS for the FD-estimation - P-value of: {p_value:.4f}.")
else:
    print(f"Fail to reject null hypothesis: We cannot reject CRS for the FD-estimation. P-value of: {p_value:.4f}.")