In [1]:
from __future__ import division
import pandas as pd
import numpy as np   # numerical operations
import statsmodels.api as sm 
import statsmodels.formula.api as smf
import os
import matplotlib.pyplot as plt

In [2]:
!pip install linearmodels
import linearmodels as lm

from linearmodels import PanelOLS
from linearmodels import RandomEffects
from linearmodels import FirstDifferenceOLS



In [3]:
os.chdir('/Users/minpan/Desktop/Data Analysis  Data') # change working di
d = pd.read_csv("panel-for-R (2).csv")
d.head()

Unnamed: 0,idnum,panelwave,ballot,form,formwt,oversamp,sample,panstat_2,panstat_3,mar1,...,wtpan12,wtpan123,wtpannr12,wtpannr123,xmarsex,xmovie,xnorcsiz,year,yearval,zodiac
0,9,1,3,2,1,1,9,1,1,5.0,...,0.414689,0.487828,0.435503,0.470575,1.0,2.0,1.0,2006.0,,9.0
1,9,2,3,2,1,1,9,1,1,5.0,...,0.414689,0.487828,0.435503,0.470575,1.0,1.0,1.0,2008.0,,9.0
2,9,3,3,2,1,1,9,1,1,1.0,...,0.414689,0.487828,0.435503,0.470575,1.0,1.0,1.0,2010.0,,9.0
3,10,1,1,1,1,1,9,1,1,5.0,...,0.829377,0.858741,0.766632,0.828371,1.0,,1.0,2006.0,,3.0
4,10,2,1,1,1,1,9,1,1,5.0,...,0.829377,0.858741,0.766632,0.828371,1.0,,1.0,2008.0,,3.0


#### Variable of Interest

- abany: abortion if woman wants for any reason

Questions associated with this variable:
Please tell me whether or not you think it should be possible for a pregnant woman to obtain a legal abortion if the woman wants it for any reason?

0:Inapplicable
1:Yes
2:No
8:Don't know
9:No answer

- partyid: Generally speaking, do you usually think of yourself as a Republican, Democrat, Independent, or what?

-99	.n: No answer
-98	.d: Do not Know/Cannot Choose$
0	Strong democrat
1	Not very strong democrat
2	Independent, close to democrat
3	Independent (neither, no response)
4	Independent, close to republican
5	Not very strong republican
6	Strong republican
7	Other party

- Degree: RESPONDENT'S DEGREE

0: Less than high school
1: High school
2: Associate/junior college
3: Bachelor's
4: Graduate 

-97	.s: Skipped on Web
-98	.d: Do not Know/Cannot Choose
-99	.n: No answer

In [4]:
#narrowing down to variables of interest
sub=d[['abany','partyid','degree','idnum','panelwave']]
sub=sub.dropna()
sub.head()

Unnamed: 0,abany,partyid,degree,idnum,panelwave
0,1.0,0.0,3.0,9,1
1,1.0,0.0,3.0,9,2
2,1.0,0.0,3.0,9,3
3,1.0,1.0,4.0,10,1
4,2.0,0.0,4.0,10,2


In [5]:
# explore variables 
summary = d['abany'].describe()
print(summary)

count    3189.000000
mean        1.592662
std         0.491416
min         1.000000
25%         1.000000
50%         2.000000
75%         2.000000
max         2.000000
Name: abany, dtype: float64


In [6]:
summary2 = d['partyid'].describe()
print(summary2)

count    4784.000000
mean        2.823370
std         2.073284
min         0.000000
25%         1.000000
50%         3.000000
75%         5.000000
max         7.000000
Name: partyid, dtype: float64


In [7]:
summary3 = d['degree'].describe()
print(summary3)

count    4811.000000
mean        1.648098
std         1.218126
min         0.000000
25%         1.000000
50%         1.000000
75%         3.000000
max         4.000000
Name: degree, dtype: float64


In [8]:
# recode 'abany' to binary outcome (0 or 1), representing 
# whether individuals support (1) or no support (0) abortion  if a woman wants it for any reason.

conditions = [
    (sub['abany'] == 1) ,
    (sub['abany'] == 2)]
choices = [1, 0]
sub['abn'] = np.select(conditions, choices, default=np.nan)

In [9]:
# check the recoding result 
pd.crosstab(index=sub["abn"], columns="count")

col_0,count
abn,Unnamed: 1_level_1
0.0,1876
1.0,1296


In [10]:
# recode 'partyid' to exlude other party Recode 'partyid' to exclude other party affiliation
# in this analysis I only examine democratic and republican affiliations

conditions = [
    (sub['partyid'] >= 0) & (sub['partyid'] <= 6)]
choices = [sub['partyid']]
sub['partyid1'] = np.select(conditions, choices, default=np.nan)

# check the recoding result 
pd.crosstab(index=sub["partyid1"], columns="count") 

col_0,count
partyid1,Unnamed: 1_level_1
0.0,520
1.0,551
2.0,376
3.0,549
4.0,251
5.0,458
6.0,395


# Naive ("pooled") OLS regression on the panel data

In [11]:
lm_ols = smf.ols(formula = 'abn ~ partyid1 + degree', data = sub).fit()
print (lm_ols.summary())

                            OLS Regression Results                            
Dep. Variable:                    abn   R-squared:                       0.091
Model:                            OLS   Adj. R-squared:                  0.091
Method:                 Least Squares   F-statistic:                     155.2
Date:                Tue, 12 Dec 2023   Prob (F-statistic):           5.89e-65
Time:                        20:19:07   Log-Likelihood:                -2042.7
No. Observations:                3100   AIC:                             4091.
Df Residuals:                    3097   BIC:                             4110.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.4123      0.018     22.865      0.0

##### Data Analysis:

I formulated hypotheses based on the expectation that individuals affiliated with the Democratic party are more inclined to support a woman's right to obtain an abortion for any reason, given the historical stance of the Democratic party on abortion rights. Additionally, I anticipated that individuals with higher educational degrees might be more likely to support abortion, assuming that those with higher education levels are more likely to endorse women's autonomy over their bodies.

The regression output shows that a one-unit increase in political affiliation from being Democratic to Republican is associated with a 5.4% decrease in the likelihood of a respondent supporting abortion if a woman wants it, all else being constant;

A one-unit increase in the respondent's educational degree is associated with an 8.6% increase in the likelihood of supporting abortion if a woman wants it, holding other variables constant, which indicates that higher educational levels are associated with increased support for abortion. 

Both coefficients are statistically significant, indicated by p-values of 0.000.

The R-squared value (0.091) suggests that the model explains 9.1% of the variance in opinions towards abortion. 

In conclusion, the regression model shows that my hypotheses are valid.

# First Differences Regression 

In [12]:
sub =sub.set_index(["idnum", "panelwave"])
sub

Unnamed: 0_level_0,Unnamed: 1_level_0,abany,partyid,degree,abn,partyid1
idnum,panelwave,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9,1,1.0,0.0,3.0,1.0,0.0
9,2,1.0,0.0,3.0,1.0,0.0
9,3,1.0,0.0,3.0,1.0,0.0
10,1,1.0,1.0,4.0,1.0,1.0
10,2,2.0,0.0,4.0,0.0,0.0
...,...,...,...,...,...,...
4500,1,2.0,6.0,1.0,0.0,6.0
4500,2,2.0,6.0,1.0,0.0,6.0
4500,3,2.0,6.0,1.0,0.0,6.0
4505,1,2.0,3.0,0.0,0.0,3.0


In [16]:
sub = sub.dropna()
exog_vars = ["partyid1","degree"] 
exog = sub[exog_vars]
lm_fd = FirstDifferenceOLS(sub.abn, exog)
abn_fd = lm_fd.fit(cov_type='clustered', cluster_entity=True)
print(abn_fd)

                     FirstDifferenceOLS Estimation Summary                      
Dep. Variable:                    abn   R-squared:                        0.0026
Estimator:         FirstDifferenceOLS   R-squared (Between):             -0.2193
No. Observations:                1757   R-squared (Within):               0.0007
Date:                Tue, Dec 12 2023   R-squared (Overall):             -0.1952
Time:                        20:20:24   Log-likelihood                   -1019.0
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      2.3123
Entities:                        1330   P-value                           0.0993
Avg Obs:                       2.3308   Distribution:                  F(2,1755)
Min Obs:                       1.0000                                           
Max Obs:                       3.0000   F-statistic (robust):             2.5361
                            

##### Data Analysis: 

The R-squared shows that there is only about 0.26% of the variance in the differenced 'abn' variable. This suggests that changes in 'partyid1' and 'degree' over time do not strongly predict changes in abortion support.

A one-unit shift from Democratic to Republican affiliation is associated with a 1.4% decrease in the likelihood of supporting abortion for any reason. This finding aligns with the my expectation of political affiliations influencing abortion views, but the effect size is small. The p-value for 'partyid1' is 0.0867, which is marginally above the conventional threshold of 0.05. This indicates that while there is some evidence of a relationship, it is not strong enough to be considered statistically significany.

Contrary to the OLS regression results and initial hypotheses, the coefficient for 'degree' suggests a negative association between changes in education and support for abortion. However, this relationship is not statistically significant given that p-value is 0.1474. 

This result might suggest that within-individual changes in education level over time are not a strong predictor of changes in views on abortion, or it could reflect the influence of unobserved confounding factors.

In [14]:
# Log-transformation of the variables
sub['log_abn'] = np.log(sub['abn'] + 1) 
sub['log_partyid1'] = np.log(sub['partyid1'] + 1)
sub['log_degree'] = np.log(sub['degree'] + 1)

In [17]:
exog_vars = ["log_partyid1","log_degree"] #e.g., exog_vars = ["hrs1", "educ", "age"]" ##
exog = sub[exog_vars]
lm_fd = FirstDifferenceOLS(sub.log_abn, exog)
abn_fd = lm_fd.fit(cov_type='clustered', cluster_entity=True)
print(abn_fd)

                     FirstDifferenceOLS Estimation Summary                      
Dep. Variable:                log_abn   R-squared:                        0.0037
Estimator:         FirstDifferenceOLS   R-squared (Between):             -0.3169
No. Observations:                1757   R-squared (Within):               0.0012
Date:                Tue, Dec 12 2023   R-squared (Overall):             -0.2807
Time:                        20:20:31   Log-likelihood                   -374.13
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      3.2237
Entities:                        1330   P-value                           0.0400
Avg Obs:                       2.3308   Distribution:                  F(2,1755)
Min Obs:                       1.0000                                           
Max Obs:                       3.0000   F-statistic (robust):             2.9458
                            

##### Data Analysis: 

The model reveals a statistically significant relationship between changes in political affiliation and the likelihood of supporting abortion. Specifically, a one-percentage point increase in 'partyid1', reflecting a shift towards Republican affiliation, is associated with a 3.5% decrease in the log-transformed likelihood of supporting abortion. This finding underscores the influential role of political ideology in shaping views on abortion, aligning with general expectations about political affiliations and social attitudes. This suggests that the effect of political affiliation changes on abortion views might be more multiplicative or exponential rather than linear.

The analysis does not find a statistically significant relationship between changes in educational level and the likelihood of supporting abortion. 