In [80]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from tabulate import tabulate
from statsmodels.stats.outliers_influence import variance_inflation_factor
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from stargazer.stargazer import Stargazer
import scipy.stats as stats
import seaborn as sns
import os

In [81]:
absolute_path = "C:\\Users\\tykun\\\OneDrive\\Documents\\SchoolDocs\VSCodeProjects\\connectedData\\board_analysis\\"
final_scripts = "final_scripts\\"
regression = "regression\\"
regression_path =  f"{absolute_path}{final_scripts}{regression}"
university_stats_path = os.path.join(regression_path, "regression_stats", "regression_university_board_statistics.csv")


In [82]:
regression_data = pd.read_csv(university_stats_path)
regression_data["Year"].astype(int, errors="raise")
print(regression_data.columns)

Index(['Year', 'Institution', 'AffiliationId', 'female_president',
       'PrimarySample', 'total_members', 'total_ethnicity', 'board_turnover',
       'carnegie_id', 'state', 'control', 'StateSystem', 'region',
       'num_billionaires', 'student.women', 'faculty.women',
       'faculty.race_ethnicity.white', 'student.size',
       'cost.tuition.out_of_state', 'school.faculty_salary', 'RD_expenditure',
       'female_proportion', 'poc_proportion', 'billionaire_proportion',
       'eigenvector', 'betweenness', 'degree', 'strength', 'clustering',
       'Rank'],
      dtype='object')


In [83]:
continuous_vars = [
    "student.women", 
    "student.size", 
    "female_proportion",
    "billionaire_proportion",
    "total_members",
    "betweenness",
    "degree",
    "faculty.race_ethnicity.white",
    "poc_proportion",
    "board_turnover",
    "faculty.women",
    "strength",
    "cost.tuition.out_of_state",
    "school.faculty_salary",
    "RD_expenditure",
    "clustering",
]

year_var = "Year"

regression_data = pd.get_dummies(
    regression_data,
    columns=[year_var, "control", "Rank"],
    drop_first=True
)

year_dummies = [col for col in regression_data.columns if col.startswith(f"{year_var}_")]
control_dummies = [col for col in regression_data.columns if col.startswith("control_")]
rank_dummies = [col for col in regression_data.columns if col.startswith("Rank_")]

for dummy in year_dummies + control_dummies + rank_dummies:
    regression_data[dummy] = regression_data[dummy].astype(int)

dependent_var = "female_president"
year_var = "Year"

regression_data[dependent_var] = regression_data[dependent_var].astype(int)

df = regression_data[[dependent_var] + continuous_vars + year_dummies + control_dummies + rank_dummies]

for c in continuous_vars:
    df[c] = (df[c] - df[c].mean())/df[c].std()

student=["student.women", "student.size", "cost.tuition.out_of_state",]
faculty=[ "faculty.women", "faculty.race_ethnicity.white", "school.faculty_salary", "RD_expenditure",]
board=["female_proportion", "billionaire_proportion","total_members","poc_proportion", "board_turnover",]
network=["betweenness","degree", "strength", "clustering"]


def logit_regression(df, dependent_var,independent_vars):
    df1=df[[dependent_var]+independent_vars].dropna().copy()
    df1 = sm.add_constant(df1,prepend=False)  
    X1=df1.iloc[:,1:]
    Y1 = df1[dependent_var]
    model = sm.Logit(Y1, X1).fit()

    return model


m1=logit_regression(df, dependent_var,student+control_dummies+year_dummies+rank_dummies)
m2=logit_regression(df, dependent_var,faculty+control_dummies+year_dummies+rank_dummies)
m3=logit_regression(df, dependent_var,board+control_dummies+year_dummies+rank_dummies)
m4=logit_regression(df, dependent_var,network+control_dummies+year_dummies+rank_dummies)
m5=logit_regression(df, dependent_var,student+faculty+board+network+control_dummies+year_dummies+rank_dummies)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[c] = (df[c] - df[c].mean())/df[c].std()


Optimization terminated successfully.
         Current function value: 0.271288
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.262908
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.265766
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.269508
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.250075
         Iterations 9


In [84]:
df

Unnamed: 0,female_president,student.women,student.size,female_proportion,billionaire_proportion,total_members,betweenness,degree,faculty.race_ethnicity.white,poc_proportion,...,Year_2009,Year_2010,Year_2011,Year_2013,Year_2018,control_Public,Rank_101,Rank_11 to 30,Rank_31 to 80,Rank_81 to 100
0,0,1.027532,-1.200190,1.598661,-0.180435,-0.117737,-0.339486,-0.825165,0.819047,-0.511012,...,0,0,0,0,0,0,1,0,0,0
1,0,0.035834,1.456545,1.834287,-0.180435,-0.930854,-0.339486,-0.146376,0.467054,-0.953135,...,0,0,0,0,0,1,1,0,0,0
2,0,0.105990,0.486293,-0.089986,-0.180435,-1.083313,-0.339486,-0.825165,1.092200,0.422358,...,0,0,0,0,0,1,1,0,0,0
3,0,0.669617,-0.000534,-1.464467,-0.180435,0.288821,-0.339486,0.532413,1.566036,0.766231,...,0,0,0,0,0,0,1,0,0,0
4,0,0.040591,-0.188845,-0.726588,-0.180435,0.390461,-0.339486,-0.825165,1.277752,-0.953135,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1785,0,-0.494498,0.825130,-0.969654,-0.180435,-0.778394,-0.339486,-0.825165,0.087189,-0.127839,...,0,0,0,0,1,1,1,0,0,0
1786,0,-0.449313,0.609164,0.538348,-0.180435,-1.184953,-0.339486,-0.825165,-0.225782,-0.953135,...,0,0,0,0,1,1,1,0,0,0
1787,1,-1.680016,-1.393855,0.782098,-0.180435,-0.066917,-0.339486,-0.146376,-0.828629,-0.099381,...,0,0,0,0,1,0,1,0,0,0
1788,1,-0.045023,-0.170624,-0.639779,-0.180435,-1.235772,-0.339486,-0.825165,-0.097568,-0.953135,...,0,0,0,0,1,1,1,0,0,0


In [85]:
from stargazer.stargazer import Stargazer

stargazer = Stargazer([m1, m2,m3,m4,m5])

stargazer.title("Logistic Regression Results")
#stargazer.custom_columns(["Model 1" ], [1])
stargazer.show_model_numbers(True)
stargazer.covariate_order(student+faculty+board+network+control_dummies+year_dummies+rank_dummies)
stargazer.dependent_variable_name('Female president')


renames=['Female students proportion','Student size','Board female proportion','Board billionaire proportion',
                'Board size','Betweeness centrality','Degree','Faculty white proportion','Board ethnic minority proportion',
                 'Borad turnover','Female faculty proportion','Strength','Out of state tuition',
                 'Faculty salary','R&D expenditure','Clustering']
rename_dic=dict(zip(continuous_vars, renames))
rename_dic['control_Public']='Pulbic institution'
stargazer.rename_covariates(rename_dic)


latex_output=stargazer.render_latex()

with open("regression_table.tex", "w") as f:
    f.write(latex_output)

In [86]:
stargazer

0,1,2,3,4,5
,,,,,
,Dependent variable: Female president,Dependent variable: Female president,Dependent variable: Female president,Dependent variable: Female president,Dependent variable: Female president
,,,,,
,(1),(2),(3),(4),(5)
,,,,,
Female students proportion,-0.037,,,,-0.061
,(0.087),,,,(0.150)
Student size,0.009,,,,0.329**
,(0.132),,,,(0.161)
Out of state tuition,0.323,,,,0.291


In [87]:
vars=[var for var in df.columns.to_list() if not var.startswith('Year')]
df[vars].corr()

Unnamed: 0,female_president,student.women,student.size,female_proportion,billionaire_proportion,total_members,betweenness,degree,faculty.race_ethnicity.white,poc_proportion,...,strength,cost.tuition.out_of_state,school.faculty_salary,RD_expenditure,clustering,control_Public,Rank_101,Rank_11 to 30,Rank_31 to 80,Rank_81 to 100
female_president,1.0,-0.025503,-0.061481,0.061723,-0.023835,-0.01494,0.008581,0.018022,-0.042042,0.035362,...,-0.038712,0.106562,0.107882,-0.067741,-0.02691,-0.090881,-0.005824,-0.056555,-0.028549,0.046128
student.women,-0.025503,1.0,0.181779,0.169421,0.007158,-0.03141,-0.040634,-0.001609,-0.029043,0.059619,...,0.074672,-0.147208,-0.219946,-0.285205,-0.018546,0.060945,0.100006,-0.027102,-0.041077,-0.034458
student.size,-0.061481,0.181779,1.0,0.149129,-0.035112,-0.211901,-0.153717,-0.186658,0.037231,0.211978,...,0.018948,-0.246322,-0.050287,0.28044,-0.000705,0.684582,-0.091309,0.05112,0.162919,0.049258
female_proportion,0.061723,0.169421,0.149129,1.0,0.031796,-0.201502,-0.12748,-0.152094,-0.103046,0.163752,...,-0.054383,0.014054,0.038925,0.036727,-0.04662,0.132698,-0.075381,0.005834,0.038274,0.024407
billionaire_proportion,-0.023835,0.007158,-0.035112,0.031796,1.0,0.078392,0.116616,0.133457,-0.04506,0.068582,...,0.055855,0.12575,0.145811,0.095309,-0.018981,-0.122614,-0.077924,0.018923,0.024073,-0.007732
total_members,-0.01494,-0.03141,-0.211901,-0.201502,0.078392,1.0,0.357253,0.472062,-0.051055,-0.003235,...,0.240603,0.3501,0.351362,0.144013,0.15124,-0.42168,-0.269843,0.214557,0.1204,0.006631
betweenness,0.008581,-0.040634,-0.153717,-0.12748,0.116616,0.357253,1.0,0.70944,0.003288,-0.032158,...,0.230687,0.174279,0.176103,0.101101,-0.005842,-0.295619,-0.225893,0.094287,0.129999,0.002982
degree,0.018022,-0.001609,-0.186658,-0.152094,0.133457,0.472062,0.70944,1.0,-0.030635,-0.007454,...,0.577553,0.186147,0.211314,0.110229,0.200974,-0.333082,-0.277128,0.164334,0.135629,5e-06
faculty.race_ethnicity.white,-0.042042,-0.029043,0.037231,-0.103046,-0.04506,-0.051055,0.003288,-0.030635,1.0,-0.245343,...,-0.034864,-0.206944,-0.280261,-0.193807,0.068457,0.056231,0.124657,-0.072123,-0.027981,0.006904
poc_proportion,0.035362,0.059619,0.211978,0.163752,0.068582,-0.003235,-0.032158,-0.007454,-0.245343,1.0,...,0.095206,0.013843,0.131919,0.078153,-0.064848,0.099391,-0.135049,0.053995,0.088676,0.006683


In [88]:
df

Unnamed: 0,female_president,student.women,student.size,female_proportion,billionaire_proportion,total_members,betweenness,degree,faculty.race_ethnicity.white,poc_proportion,...,Year_2009,Year_2010,Year_2011,Year_2013,Year_2018,control_Public,Rank_101,Rank_11 to 30,Rank_31 to 80,Rank_81 to 100
0,0,1.027532,-1.200190,1.598661,-0.180435,-0.117737,-0.339486,-0.825165,0.819047,-0.511012,...,0,0,0,0,0,0,1,0,0,0
1,0,0.035834,1.456545,1.834287,-0.180435,-0.930854,-0.339486,-0.146376,0.467054,-0.953135,...,0,0,0,0,0,1,1,0,0,0
2,0,0.105990,0.486293,-0.089986,-0.180435,-1.083313,-0.339486,-0.825165,1.092200,0.422358,...,0,0,0,0,0,1,1,0,0,0
3,0,0.669617,-0.000534,-1.464467,-0.180435,0.288821,-0.339486,0.532413,1.566036,0.766231,...,0,0,0,0,0,0,1,0,0,0
4,0,0.040591,-0.188845,-0.726588,-0.180435,0.390461,-0.339486,-0.825165,1.277752,-0.953135,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1785,0,-0.494498,0.825130,-0.969654,-0.180435,-0.778394,-0.339486,-0.825165,0.087189,-0.127839,...,0,0,0,0,1,1,1,0,0,0
1786,0,-0.449313,0.609164,0.538348,-0.180435,-1.184953,-0.339486,-0.825165,-0.225782,-0.953135,...,0,0,0,0,1,1,1,0,0,0
1787,1,-1.680016,-1.393855,0.782098,-0.180435,-0.066917,-0.339486,-0.146376,-0.828629,-0.099381,...,0,0,0,0,1,0,1,0,0,0
1788,1,-0.045023,-0.170624,-0.639779,-0.180435,-1.235772,-0.339486,-0.825165,-0.097568,-0.953135,...,0,0,0,0,1,1,1,0,0,0


In [89]:
edges=pd.read_csv('data/network_data/yearly_interlocks/1999_network_interlock_edges.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'data/network_data/yearly_interlocks/1999_network_interlock_edges.csv'

In [None]:
nodes=pd.read_csv('data/network_data/yearly_interlocks/1999_network_interlock_nodes.csv')

In [None]:
edges

Unnamed: 0,Id,Source,Target,Type,Weight,Year
0,e_1999_1,Case Western Reserve University,Columbia University,Undirected,0.013158,1999
1,e_1999_2,Chapman University,Cornell University,Undirected,0.011628,1999
2,e_1999_3,Clarkson University,Drexel University,Undirected,0.015625,1999
3,e_1999_4,Carnegie Mellon University,Duquesne University,Undirected,0.021898,1999
4,e_1999_5,Clark Atlanta University,Emory University,Undirected,0.043478,1999
...,...,...,...,...,...,...
111,e_1999_112,Tennessee Board of Regents,University of Tennessee System,Undirected,0.027027,1999
112,e_1999_113,Columbia University,Yeshiva University,Undirected,0.008264,1999
113,e_1999_114,Massachusetts Institute Of Technology,Yeshiva University,Undirected,0.006711,1999
114,e_1999_115,New York University,Yeshiva University,Undirected,0.005814,1999


In [None]:
nodes

Unnamed: 0,Id,Label,Interlock_Count,AffiliationId,Board_Size
0,Adelphi University,Adelphi University,1,71965598,17
1,American University,American University,0,181401687,28
2,Andrews University,Andrews University,4,102298084,36
3,Arizona Board of Regents,Arizona Board of Regents,1,203172682,13
4,Auburn University,Auburn University,3,82497590,12
...,...,...,...,...,...
147,Western Michigan University,Western Michigan University,0,141649380,8
148,Widener University,Widener University,1,138659443,26
149,Worcester Polytechnic Institute,Worcester Polytechnic Institute,1,107077323,38
150,Yale University,Yale University,2,32971472,19


In [None]:
df

Unnamed: 0,Institution,AffiliationId,female_president,PrimarySample,total_members,total_ethnicity,board_turnover,carnegie_id,state,StateSystem,...,clustering,Year_2000,Year_2005,Year_2007,Year_2008,Year_2009,Year_2011,Year_2013,Year_2018,control_Public
0,American University,181401687.0,0,True,28,28,0.000000,131159,DC,False,...,0.000000,0,0,0,0,0,0,0,0,0
1,Arizona State University,55732556.0,0,True,12,12,0.000000,104151,AZ,Arizona Board of Regents,...,0.000000,0,0,0,0,0,0,0,0,1
2,Ball State University,198089087.0,0,True,9,9,0.000000,150136,IN,False,...,0.000000,0,0,0,0,0,0,0,0,1
3,Baylor University,157394403.0,0,True,36,36,0.000000,223232,TX,False,...,0.027589,0,0,0,0,0,0,0,0,0
4,Boston College,103531236.0,0,True,38,38,0.000000,164924,MA,False,...,0.000000,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1606,West Virginia University,12097938.0,0,True,15,15,0.250000,238032,WV,False,...,0.000000,0,0,0,0,0,0,0,1,1
1607,Western Michigan University,141649380.0,0,True,7,7,0.293333,172699,MI,False,...,0.000000,0,0,0,0,0,0,0,1,1
1608,Worcester Polytechnic Institute,107077323.0,1,True,29,29,0.190164,168421,MA,False,...,0.000000,0,0,0,0,0,0,0,1,0
1609,Wright State University,19648265.0,1,True,6,6,0.350000,206604,OH,University System of Ohio,...,0.000000,0,0,0,0,0,0,0,1,1
