## Part (a) ##

In [1]:
import numpy as np
import pandas as pd

import scipy as sp
import scipy.stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.sandbox.regression.gmm import IV2SLS
from linearmodels.panel import PanelOLS

from sklearn.preprocessing import PolynomialFeatures

fish_data = pd.read_stata("D:/UCL MSc/ECON0072 Empirical Industrial Organization/Problem Set/FISH.dta")

In [2]:
fish_data.head()

Unnamed: 0,prca,prcw,qtya,qtyw,mon,tues,wed,thurs,speed2,wave2,speed3,wave3,avgprc,totqty,lavgprc,ltotqty,t,lavgp_1,gavgprc,gavgp_1
0,0.622222,0.766667,1875,2205,1,0,0,0,15,7.5,20,9.0,0.700286,4080,-0.356266,8.313852,1,,,
1,0.972222,1.175,2900,566,0,0,1,0,10,5.0,20,7.5,1.005336,3466,0.005322,8.150757,2,-0.356266,0.361588,
2,1.233333,1.475,770,1525,0,0,0,1,10,6.0,20,4.0,1.393918,2295,0.332118,7.738488,3,0.005322,0.326797,0.361588
3,1.928571,1.625,927,943,0,0,0,0,15,6.0,20,5.0,1.775487,1870,0.574075,7.533694,4,0.332118,0.241956,0.326797
4,0.803125,0.864286,4220,2665,1,0,0,0,10,3.5,20,3.5,0.826799,6885,-0.190194,8.8371,5,0.574075,-0.764269,0.241956


In [3]:
fish_data.dropna(subset=["ltotqty"], inplace=True)
fish_data.dropna(subset=["lavgprc"], inplace=True)
fish_data.dropna(subset=["mon"], inplace=True)
fish_data.dropna(subset=["tues"], inplace=True)
fish_data.dropna(subset=["wed"], inplace=True)
fish_data.dropna(subset=["thurs"], inplace=True)

In [4]:
model_expr = "ltotqty~lavgprc+mon+tues+wed+thurs"
model_ols = smf.ols(formula=model_expr, data=fish_data)
model_ols_results = model_ols.fit()
print(model_ols_results.summary())

                            OLS Regression Results                            
Dep. Variable:                ltotqty   R-squared:                       0.217
Model:                            OLS   Adj. R-squared:                  0.174
Method:                 Least Squares   F-statistic:                     5.039
Date:                Tue, 05 Mar 2024   Prob (F-statistic):           0.000403
Time:                        01:58:07   Log-Likelihood:                -99.253
No. Observations:                  97   AIC:                             210.5
Df Residuals:                      91   BIC:                             226.0
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      8.2443      0.163     50.637      0.0

## Part (b) ##

In [5]:
fish_data["log_speed2"] = np.log(fish_data["speed2"])
fish_data["log_wave2"] = np.log(fish_data["wave2"])
fish_data["log_speed3"] = np.log(fish_data["speed3"])
fish_data["log_wave3"] = np.log(fish_data["wave3"])
fish_data.head()

Unnamed: 0,prca,prcw,qtya,qtyw,mon,tues,wed,thurs,speed2,wave2,...,lavgprc,ltotqty,t,lavgp_1,gavgprc,gavgp_1,log_speed2,log_wave2,log_speed3,log_wave3
0,0.622222,0.766667,1875,2205,1,0,0,0,15,7.5,...,-0.356266,8.313852,1,,,,2.708984,2.014903,2.996094,2.197225
1,0.972222,1.175,2900,566,0,0,1,0,10,5.0,...,0.005322,8.150757,2,-0.356266,0.361588,,2.302734,1.609438,2.996094,2.014903
2,1.233333,1.475,770,1525,0,0,0,1,10,6.0,...,0.332118,7.738488,3,0.005322,0.326797,0.361588,2.302734,1.791759,2.996094,1.386294
3,1.928571,1.625,927,943,0,0,0,0,15,6.0,...,0.574075,7.533694,4,0.332118,0.241956,0.326797,2.708984,1.791759,2.996094,1.609438
4,0.803125,0.864286,4220,2665,1,0,0,0,10,3.5,...,-0.190194,8.8371,5,0.574075,-0.764269,0.241956,2.302734,1.252763,2.996094,1.252763


In [6]:
# First Stage
model_expr = "lavgprc~mon+tues+wed+thurs+log_speed2+log_wave2+log_speed3+log_wave3"
model_fs = smf.ols(formula=model_expr, data=fish_data)
model_fs_results = model_fs.fit()
print(model_fs_results.summary())

                            OLS Regression Results                            
Dep. Variable:                lavgprc   R-squared:                       0.311
Model:                            OLS   Adj. R-squared:                  0.248
Method:                 Least Squares   F-statistic:                     4.958
Date:                Tue, 05 Mar 2024   Prob (F-statistic):           4.39e-05
Time:                        01:58:07   Log-Likelihood:                -31.312
No. Observations:                  97   AIC:                             80.62
Df Residuals:                      88   BIC:                             103.8
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.7254      0.448     -3.848      0.0

In [7]:
hypotheses = '(log_speed2 = log_wave2 = log_speed3 = log_wave3 = 0)'
f_test = model_fs_results.f_test(hypotheses)
print(f_test)

<F test: F=array([[9.63613995]]), p=1.6479718029228237e-06, df_denom=88, df_num=4>


## Part (c) ##

In [8]:
fish_data["fitted_values_lavgprc"] = model_fs_results.fittedvalues
print(fish_data["fitted_values_lavgprc"])

0     0.112454
1    -0.085467
2    -0.087801
3    -0.151985
4    -0.518209
        ...   
92   -0.422940
93   -0.358662
94   -0.205636
95   -0.068456
96    0.048622
Name: fitted_values_lavgprc, Length: 97, dtype: float64


In [9]:
# Second Stage
iv2sls_model = IV2SLS(endog=fish_data['ltotqty'], exog=fish_data[['fitted_values_lavgprc','mon','tues','wed','thurs']], instrument=fish_data[['log_speed2','log_wave2','log_speed3',"log_wave3"]])
iv2sls_model_results = iv2sls_model.fit()
print(iv2sls_model_results.summary())

                          IV2SLS Regression Results                           
Dep. Variable:                ltotqty   R-squared:                     -21.702
Model:                         IV2SLS   Adj. R-squared:                -22.936
Method:                     Two Stage   F-statistic:                       nan
                        Least Squares   Prob (F-statistic):                nan
Date:                Tue, 05 Mar 2024                                         
Time:                        01:58:07                                         
No. Observations:                  97                                         
Df Residuals:                      92                                         
Df Model:                           5                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
fitted_values_lavgprc     6.93