In [3]:
import pandas as pd
import statsmodels.api as sm
from scipy.stats import t

# Define the file path to your 'firms.csv' file
file_path = r'C:\Users\henrik\Downloads\firms.csv'

# Load the dataset
data = pd.read_csv(file_path)

# Filter the data to include only the first three years (1968, 1969, and 1970)
data = data[(data['year'] >= 1968) & (data['year'] <= 1970)]

# Create dummy variables for firm-specific effects
data['firm_dummies'] = pd.Categorical(data['firmid']).codes

# Define the dependent variable (log of deflated sales)
y = data['ldsa']

# Define the independent variables (log of adjusted capital stock and log of employment)
X = data[['lcap', 'lemp']]

# Add a constant term
X = sm.add_constant(X)

# Create a Fixed Effects model
fe_model = sm.OLS(y, X)

# Fit the Fixed Effects model
fe_results = fe_model.fit()

# Print the summary of the regression results
print(fe_results.summary())

# Extract the coefficients for lcap (βK) and lemp (βL) from the FE results
beta_k = fe_results.params['lcap']
beta_l = fe_results.params['lemp']

# Define the null hypothesis: βK + βL = 1
null_hypothesis = 1

# Calculate the test statistic
test_statistic = beta_k + beta_l - null_hypothesis

# Get the standard errors for βK and βL
std_err_k = fe_results.bse['lcap']
std_err_l = fe_results.bse['lemp']

# Calculate the standard error of the test statistic
std_err_test_statistic = (std_err_k ** 2 + std_err_l ** 2) ** 0.5

# Calculate the degrees of freedom
degrees_of_freedom = len(fe_results.params) - 1  # Number of coefficients minus 1

# Set the significance level (alpha)
alpha = 0.05

# Calculate the critical t-value
critical_t_value = t.ppf(1 - alpha / 2, degrees_of_freedom)

# Calculate the p-value
p_value = 2 * (1 - t.cdf(abs(test_statistic), degrees_of_freedom))

# Print the results
print(f'Test Statistic: {test_statistic:.4f}')
print(f'Critical t-value: {critical_t_value:.4f}')
print(f'P-value: {p_value:.4f}')

# Check if the null hypothesis is rejected
if abs(test_statistic) > critical_t_value:
    print('Reject the null hypothesis: Production does not exhibit constant returns to scale.')
else:
    print('Fail to reject the null hypothesis: Production exhibits constant returns to scale.')

                            OLS Regression Results                            
Dep. Variable:                   ldsa   R-squared:                       0.917
Model:                            OLS   Adj. R-squared:                  0.917
Method:                 Least Squares   F-statistic:                     7309.
Date:                Sun, 08 Oct 2023   Prob (F-statistic):               0.00
Time:                        20:43:58   Log-Likelihood:                -512.34
No. Observations:                1323   AIC:                             1031.
Df Residuals:                    1320   BIC:                             1046.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       3.841e-08      0.010   3.92e-06      1.0