In [1]:
%%capture
import stata_setup, os
if os.name == 'nt':
    stata_setup.config('C:/Program Files/Stata17/','mp')
else:
    stata_setup.config('/usr/local/stata17','mp')

## Data Preparation

In [2]:
%%stata -qui

use "../data/data", clear

quietly tabulate year, generate(y_)
quietly tabulate cluster, generate(c_)

* Generate the Flesch-Kincaid Grade Level indicator
summarize flesch_kincaid_grade_level, detail
local cutoff = r(p90)
gen int FKG_01 = (flesch_kincaid_grade_level >= `cutoff')

* Generate the excess number of sentences count
summarize num_sentences, detail
local cutoff_sentences = r(p5)
gen int excess_sentences = num_sentences - `cutoff_sentences'
replace excess_sentences = 0 if excess_sentences < 0

local journals  ecm jpe qje res  //AER based category

local jel_imp a_imp b_imp c_imp  e_imp f_imp g_imp h_imp i_imp j_imp k_imp /// 
		l_imp m_imp n_imp o_imp p_imp q_imp r_imp y_imp z_imp // D JEL based case


#delimit ;
vl set log_num_authors log_num_pages both_genders prop_women
       `journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag
       , dummy clear nonotes;
vl create fe = vldummy - (both_genders jel_flag);
vl substitute mfe = i.fe;
vl create controls = vlcontinuous - (prop_women);
vl create controls_dummy = (both_genders jel_flag);
vl substitute baseline = i.controls_dummy controls;
vl rebuild;
#delimit cr




# Elastic Net - Logistic

In [3]:
%%stata -qui -eret steret
#delimit ;
elasticnet logit FKG_01 prop_women $baseline $mfe, alpha(0.99) lambda(0.02) nolog;
#delimit cr




In [4]:
%stata ereturn display

------------------------------------------------------------------------------
      FKG_01 | Coefficient
-------------+----------------------------------------------------------------
         c_4 |
          0  |  -.3811678
          1  |   .3843454
             |
       _cons |  -1.816232
------------------------------------------------------------------------------


# Elastic Net - Poisson

In [5]:
%%stata -qui -eret steret
#delimit ;
elasticnet poisson excess_sentences prop_women $baseline $mfe, alpha(0.7) lambda(.1) nolog;
#delimit cr




In [6]:
%stata ereturn display

------------------------------------------------------------------------------
excess_sen~s | Coefficient
-------------+----------------------------------------------------------------
log_num_pa~s |   .3588161
             |
         ecm |
          0  |  -.1451147
          1  |   .1451147
             |
         qje |
          0  |  -.0868885
          1  |   .0868885
             |
         res |
          0  |  -.1660911
          1  |   .1660911
             |
       c_imp |
          0  |  -.0183949
          1  |   .0183949
             |
        c_86 |
          0  |  -.1177174
          1  |   .1177467
             |
       c_114 |
          0  |  -.2093475
          1  |   .2098763
             |
       c_136 |
          0  |  -.5765329
          1  |   .5765334
             |
       c_142 |
          0  |  -.0506581
          1  |   .0506593
             |
       c_174 |
          0  |  -.0596673
          1  |   .0596695
             |
       _cons |   .9339063
----------