### Import necessary libraries, set options

In [None]:
import numpy as np
import os
import pandas as pd
import re

pd.set_option('display.max_columns', 250)
pd.set_option('display.max_rows', 250)

### Read in dataset

In [None]:
path_to_raw_data = os.path.join("path/to/raw/data")
data = pd.read_csv(os.path.join(path_to_raw_data, 
                                "raw-data.csv"), low_memory = False)
data["condition"] = np.nan
print(data.shape)
data.head()

### Remove unnecessary rows

In [None]:
data2 = data.drop([0,1], axis = 0)
data2 = data2[(data2.Status != "Survey Preview") & (data2.Status != "Spam") & (data2.Finished == "True")]
print(data2.shape)
data2.head()

In [None]:
data2.tail()

### Verify respondents unique

In [None]:
len(np.unique(data2.rid))
data2[data2.duplicated(subset = "rid")]

In [None]:
#data2[data2.rid == "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"]

In [None]:
#data2 = data2[data2['ResponseId'] != "X_XXXXXXXXXXXXXXX"]

### Remove respondents who said "No" to the consent question

In [None]:
data3 = data2[data2["IRB Consent"] == 
              "Yes, I would like to take part in this study, and confirm that I AM A US RESIDENT and am 18 or older"]

In [None]:
data3 = data3.reset_index(drop = True).copy()

In [None]:
print(data3.shape)
data3.head()

### Reorganize/rename columns, removing sensitive info along the way

In [None]:
exp_main = [
    "condition",
    "FL_3_DO"
]

mc = [
    "SA.1"
]

opinion = [
    "DS.1", "DS.2", "DS.3", "DS.4", "DS.5", "DS.6", "DS.7", "DS.8", "DS.9",
    "DS.1R", "DS.2R", "DS.3R", "DS.4R", "DS.5R", "DS.6R", "DS.7R", "DS.8R", "DS.9R",
    "EO.1", "EO.2",
    "EO.1R", "EO.2R",
    "LHW.1", "LHW.2",
    "LHW.1R", "LHW.2R",
    "ED.1", "ED.2",
    "ED.1R", "ED.2R",
    "ISP.1", "ISP.2", "ISP.3", "ISP.4",
    "ISP.1R", "ISP.2R", "ISP.3R", "ISP.4R",
    "GT.1", "GT.2", "GT.3", "GT.4", "GT.5",
    "GT.1R", "GT.2R", "GT.3R", "GT.4R", "GT.5R",
    "GMC.1", "GMC.2", "GMC.3",
    "GMC.1R", "GMC.2R", "GMC.3R",
]

demog = [
    "DQ1", "DQ2", "DQ3", "DQ4", "DQ5", 
    "DQ6", "DQ7", "DQ8", "DQ9", "DQ10", "DQ11", 
    "DQ12", "DQ13", "DQ14", "DQ15", "DQ16", "DQ17", "DQ18", "DQ19", "DQ20",
    "DQ12R", "DQ13R", "DQ14R", "DQ15R", "DQ16R", "DQ17R", "DQ18R", "DQ19R", "DQ20R",
]

parents = [
    "PQ1", "PQ2", "PQ3", "PQ4", 
    "PQ5", "PQ6",
    "PQ5R", "PQ6R"
]

coronavirus = [
    "CQ1", "CQ2", "CQ3", "CQ4", "CQ5", "CQ6", "CQ6_10_TEXT", "CQ7",
    "CQ1R", "CQ2R", "CQ3R", "CQ4R", "CQ5R", "CQ6R", "CQ6R_10_TEXT", "CQ7R"
]

In [None]:
exp_aux = [
    "TCOR_First Click", "TCOR_Last Click", "TCOR_Page Submit", "TCOR_Click Count",
    "TCORN_First Click", "TCORN_Last Click", "TCORN_Page Submit", "TCORN_Click Count",
    "TCORC_First Click", "TCORC_Last Click", "TCORC_Page Submit", "TCORC_Click Count",
    "TINT_First Click", "TINT_Last Click", "TINT_Page Submit", "TINT_Click Count",
    "TNAT_First Click", "TNAT_Last Click", "TNAT_Page Submit", "TNAT_Click Count",
    "TCLA_First Click", "TCLA_Last Click", "TCLA_Page Submit", "TCLA_Click Count"
]

order = [
    "FL_59_DO", "FL_60_DO", "FL_61_DO",
    "FL_70_DO", "FL_71_DO", "FL_72_DO", "FL_73_DO", "FL_74_DO", "FL_75_DO", "FL_76_DO", 
    "FL_78_DO",
    "FL_83_DO", "FL_84_DO",
    "DS_DO", "EO_DO", "LHW_DO", "ED_DO", "ISP_DO", "GT_DO", "GMC_DO",
    "DS_Reverse_DO", "EO_Reverse_DO", "LHW_Reverse_DO", "ED_Reverse_DO", "ISP_Reverse_DO", 
    "GT_Reverse_DO", "GMC_Reverse_DO",
    "GMC.3_DO", "GMC.3R_DO",
    "DQ18_DO", "DQ18R_DO"
]

consent = [
    "IRB Consent"
]

qualtrics_other = [
    "StartDate", "EndDate", "Status", "Progress", "Duration (in seconds)", "Finished", "RecordedDate", 
    "RecipientLastName", "RecipientFirstName", "RecipientEmail", "ExternalReference", "DistributionChannel", 
    "UserLanguage"
]

lucid_other = [
    "age", "gender", "hhi", "ethnicity", "hispanic", "education", "political_party", "region"
]

In [None]:
qualtrics_sensitive = [
    "IPAddress", "ResponseId", "LocationLatitude", "LocationLongitude"
]

lucid_sensitive = [
    "rid", "zip"
]

In [None]:
data4 = data3[
   exp_main + mc + opinion + demog + parents + coronavirus + order + \
   exp_aux + consent + qualtrics_other + lucid_other
]

In [None]:
data4.head()

In [None]:
data5 = data4[
   exp_main + mc + opinion + demog + parents + coronavirus + order
]

In [None]:
data5.head()

In [None]:
data6 = data5.rename(columns = {
    "FL_3_DO" : "text_shown",
    "SA.1" : "comprehension_q",
    "DS.1" : "wealthy_family_essential",
    "DS.2" : "educated_parents_essential",
    "DS.3" : "having_ambition_essential",
    "DS.4" : "hard_work_essential",
    "DS.5" : "talent_essential",
    "DS.6" : "luck_essential",
    "DS.7" : "connections_essential",
    "DS.8" : "good_education_essential",
    "DS.9" : "political_influence_essential",
    "DS.1R" : "wealthy_family_essential_r",
    "DS.2R" : "educated_parents_essential_r",
    "DS.3R" : "having_ambition_essential_r",
    "DS.4R" : "hard_work_essential_r",
    "DS.5R" : "talent_essential_r",
    "DS.6R" : "luck_essential_r",
    "DS.7R" : "connections_essential_r",
    "DS.8R" : "good_education_essential_r",
    "DS.9R" : "political_influence_essential_r",
    "EO.1" : "enough_opportunities",
    "EO.2" : "more_opportunities_than_parents",
    "EO.1R" : "enough_opportunities_r",
    "EO.2R" : "more_opportunities_than_parents_r",
    "LHW.1" : "hard_work_brings_better_life",
    "LHW.2" : "people_poor_because_laziness",
    "LHW.1R" : "hard_work_brings_better_life_r",
    "LHW.2R" : "people_poor_because_laziness_r",
    "ED.1" : "high_earners_deserve_high",
    "ED.2" : "low_earners_deserve_low",
    "ED.1R" : "high_earners_deserve_high_r",
    "ED.2R" : "low_earners_deserve_low_r",
    "ISP.1" : "ineq_serious_problem",
    "ISP.2" : "poverty_serious_problem",
    "ISP.3" : "unequal_hcare_serious_problem",
    "ISP.4" : "ineq_increasing",
    "ISP.1R" : "ineq_serious_problem_r",
    "ISP.2R" : "poverty_serious_problem_r",
    "ISP.3R" : "unequal_hcare_serious_problem_r",
    "ISP.4R" : "ineq_increasing_r",
    "GT.1" : "govt_regulation_effective",
    "GT.2" : "govt_transfers_effective",
    "GT.3" : "progressive_taxes_effective",
    "GT.4" : "educ_policies_effective",
    "GT.5" : "private_charity_effective",
    "GT.1R" : "govt_regulation_effective_r",
    "GT.2R" : "govt_transfers_effective_r",
    "GT.3R" : "progressive_taxes_effective_r",
    "GT.4R" : "educ_policies_effective_r",
    "GT.5R" : "private_charity_effective_r",
    "GMC.1" : "govt_should_decrease_ineq",
    "GMC.2" : "companies_should_decrease_ineq",
    "GMC.3" : "who_most_responsible",
    "GMC.1R" : "govt_should_decrease_ineq_r",
    "GMC.2R" : "companies_should_decrease_ineq_r",
    "GMC.3R" : "who_most_responsible_r",
    "DQ1" : "is_resident",
    "DQ2" : "state",
    "DQ3" : "gender",
    "DQ4" : "age",
    "DQ5" : "married",
    "DQ6" : "has_children",
    "DQ7" : "race",
    "DQ8" : "religion",
    "DQ9" : "education",
    "DQ10" : "emp_status",
    "DQ11" : "occupation",
    "DQ12" : "household_income",
    "DQ13" : "subj_relative_income",
    "DQ14" : "income_volatile",
    "DQ15" : "sol_better_than_past",
    "DQ16" : "sol_better_in_future",
    "DQ17" : "liberal",
    "DQ18" : "political_party",
    "DQ19" : "follow_news",
    "DQ20" : "has_confidence_in_science",
    "DQ12R" : "household_income_r",
    "DQ13R" : "subj_relative_income_r",
    "DQ14R" : "income_volatile_r",
    "DQ15R" : "sol_better_than_past_r",
    "DQ16R" : "sol_better_in_future_r",
    "DQ17R" : "liberal_r",
    "DQ18R" : "political_party_r",
    "DQ19R" : "follow_news_r",
    "DQ20R" : "has_confidence_in_science_r",
    "PQ1" : "father_occ",
    "PQ2" : "mother_occ",
    "PQ3" : "father_educ",
    "PQ4" : "mother_educ",
    "PQ5" : "subj_relative_income_16_yrs",
    "PQ6" : "sol_better_than_parents",
    "PQ5R" : "subj_relative_income_16_yrs_r",
    "PQ6R" : "sol_better_than_parents_r",
    "CQ1" : "coronavirus_serious_threat",
    "CQ2" : "must_save_economy",
    "CQ3" : "satisfied_with_city",
    "CQ4" : "satisfied_with_state",
    "CQ5" : "satisfied_with_federal_govt",
    "CQ6" : "how_affected_by_coronavirus",
    "CQ6_10_TEXT" : "how_affected_by_coronavirus_other",
    "CQ7" : "days_out_in_past_week",
    "CQ1R" : "coronavirus_serious_threat_r",
    "CQ2R" : "must_save_economy_r",
    "CQ3R" : "satisfied_with_city_r",
    "CQ4R" : "satisfied_with_state_r",
    "CQ5R" : "satisfied_with_federal_govt_r",
    "CQ6R" : "how_affected_by_coronavirus_r",
    "CQ6R_10_TEXT" : "how_affected_by_coronavirus_other_r",
    "CQ7R" : "days_out_in_past_week_r",
    "FL_59_DO" : "perceptions_or_preferences_DO",
    "FL_60_DO" : "perceptions_DO",
    "FL_61_DO" : "preferences_DO",
    "DS_DO" : "DS_DO", 
    "EO_DO" : "EO_DO", 
    "LHW_DO" : "LHW_DO", 
    "ED_DO" : "ED_DO", 
    "ISP_DO" : "ISP_DO", 
    "GT_DO" : "GT_DO", 
    "GMC_DO" : "GMC_DO",
    "DS_Reverse_DO" : "DS_Reverse_DO", 
    "EO_Reverse_DO" : "EO_Reverse_DO", 
    "LHW_Reverse_DO" : "LHW_Reverse_DO", 
    "ED_Reverse_DO" : "ED_Reverse_DO", 
    "ISP_Reverse_DO" : "ISP_Reverse_DO", 
    "GT_Reverse_DO" : "GT_Reverse_DO", 
    "GMC_Reverse_DO" : "GMC_Reverse_DO",
    "FL_72_DO" : "DS_regular_or_reverse", 
    "FL_73_DO" : "EO_regular_or_reverse", 
    "FL_74_DO" : "LHW_regular_or_reverse", 
    "FL_75_DO" : "ED_regular_or_reverse", 
    "FL_76_DO" : "ISP_regular_or_reverse", 
    "FL_70_DO" : "GT_regular_or_reverse",
    "FL_71_DO" : "GMC_regular_or_reverse",
    "FL_83_DO" : "Demog_end_regular_or_reverse", 
    "FL_84_DO" : "Parents_end_regular_or_reverse",
    "FL_78_DO" : "Corona_regular_or_reverse",
    "GMC.3_DO" : "GMC.3_DO", 
    "GMC.3R_DO" : "GMC.3R_DO",
    "DQ18_DO" : "DQ18_DO", 
    "DQ18R_DO" : "DQ18R_DO"
})

In [None]:
data6.head()

### Recode values into numeric

In [None]:
data7 = data6.copy()

In [None]:
data7['wealthy_family_essential'] = data7['wealthy_family_essential'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['educated_parents_essential'] = data7['educated_parents_essential'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['having_ambition_essential'] = data7['having_ambition_essential'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['hard_work_essential'] = data7['hard_work_essential'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['talent_essential'] = data7['talent_essential'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['luck_essential'] = data7['luck_essential'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['connections_essential'] = data7['connections_essential'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['good_education_essential'] = data7['good_education_essential'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['political_influence_essential'] = data7['political_influence_essential'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7.head(3)

In [None]:
data7['wealthy_family_essential_r'] = data7['wealthy_family_essential_r'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['educated_parents_essential_r'] = data7['educated_parents_essential_r'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['having_ambition_essential_r'] = data7['having_ambition_essential_r'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['hard_work_essential_r'] = data7['hard_work_essential_r'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['talent_essential_r'] = data7['talent_essential_r'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['luck_essential_r'] = data7['luck_essential_r'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['connections_essential_r'] = data7['connections_essential_r'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['good_education_essential_r'] = data7['good_education_essential_r'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7['political_influence_essential_r'] = data7['political_influence_essential_r'].map({
    "Essential" : 5,
    "Very important" : 4,
    "Fairly important" : 3,
    "Not very important" : 2,
    "Not important at all" : 1})

data7.head(3)

In [None]:
data7['enough_opportunities'] = data7['enough_opportunities'].map({
    "No or very little opportunity" : 1,
    "Some opportunity" : 2,
    "Plenty of opportunity" : 3})

data7['more_opportunities_than_parents'] = data7['more_opportunities_than_parents'].map({
    "Less" : 1,
    "Same" : 2,
    "More" : 3})

data7.head(3)

In [None]:
data7['enough_opportunities_r'] = data7['enough_opportunities_r'].map({
    "No or very little opportunity" : 1,
    "Some opportunity" : 2,
    "Plenty of opportunity" : 3})

data7['more_opportunities_than_parents_r'] = data7['more_opportunities_than_parents_r'].map({
    "Less" : 1,
    "Same" : 2,
    "More" : 3})

data7.head(3)

In [None]:
data7['hard_work_brings_better_life'] = data7['hard_work_brings_better_life'].map({
    "1 - In the long run, hard work usually brings a better life" : 7,
    "2" : 6,
    "3" : 5,
    "4" : 4,
    "5" : 3,
    "6" : 2,
    "7 - Hard work doesn’t generally bring success—it’s more a matter of luck and connections" : 1
})

data7['people_poor_because_laziness'] = data7['people_poor_because_laziness'].map({
    "1 - People are poor because of laziness and lack of willpower" : 7,
    "2" : 6,
    "3" : 5,
    "4" : 4,
    "5" : 3,
    "6" : 2,
    "7 - People are poor because of an unfair society" : 1
})

data7.head(3)

In [None]:
data7['hard_work_brings_better_life_r'] = data7['hard_work_brings_better_life_r'].map({
    "1 - Hard work doesn’t generally bring success—it’s more a matter of luck and connections" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5" : 5,
    "6" : 6,
    "7 - In the long run, hard work usually brings a better life" : 7
})

data7['people_poor_because_laziness_r'] = data7['people_poor_because_laziness_r'].map({
    "1 - People are poor because of an unfair society" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5" : 5,
    "6" : 6,
    "7 - People are poor because of laziness and lack of willpower" : 7
})

data7.head(3)

In [None]:
data7['high_earners_deserve_high'] = data7['high_earners_deserve_high'].map({
    "Always" : 5,
    "Most of the time" : 4,
    "Sometimes" : 3,
    "Rarely" : 2,
    "Never" : 1})

data7['low_earners_deserve_low'] = data7['low_earners_deserve_low'].map({
    "Always" : 5,
    "Most of the time" : 4,
    "Sometimes" : 3,
    "Rarely" : 2,
    "Never" : 1})

data7.head(3)

In [None]:
data7['high_earners_deserve_high_r'] = data7['high_earners_deserve_high_r'].map({
    "Always" : 5,
    "Most of the time" : 4,
    "Sometimes" : 3,
    "Rarely" : 2,
    "Never" : 1})

data7['low_earners_deserve_low_r'] = data7['low_earners_deserve_low_r'].map({
    "Always" : 5,
    "Most of the time" : 4,
    "Sometimes" : 3,
    "Rarely" : 2,
    "Never" : 1})

data7.head(3)

In [None]:
data7['ineq_serious_problem'] = data7['ineq_serious_problem'].map({
    "Not a problem at all" : 1,
    "A small problem" : 2,
    "A problem" : 3,
    "A serious problem" : 4,
    "A very serious problem" : 5})

data7['poverty_serious_problem'] = data7['poverty_serious_problem'].map({
    "Not a problem at all" : 1,
    "A small problem" : 2,
    "A problem" : 3,
    "A serious problem" : 4,
    "A very serious problem" : 5})

data7['unequal_hcare_serious_problem'] = data7['unequal_hcare_serious_problem'].map({
    "Not a problem at all" : 1,
    "A small problem" : 2,
    "A problem" : 3,
    "A serious problem" : 4,
    "A very serious problem" : 5})

data7['ineq_increasing'] = data7['ineq_increasing'].map({
    "Decreasing" : 1,
    "Same" : 2,
    "Increasing" : 3})

data7.head(3)

In [None]:
data7['ineq_serious_problem_r'] = data7['ineq_serious_problem_r'].map({
    "Not a problem at all" : 1,
    "A small problem" : 2,
    "A problem" : 3,
    "A serious problem" : 4,
    "A very serious problem" : 5})

data7['poverty_serious_problem_r'] = data7['poverty_serious_problem_r'].map({
    "Not a problem at all" : 1,
    "A small problem" : 2,
    "A problem" : 3,
    "A serious problem" : 4,
    "A very serious problem" : 5})

data7['unequal_hcare_serious_problem_r'] = data7['unequal_hcare_serious_problem_r'].map({
    "Not a problem at all" : 1,
    "A small problem" : 2,
    "A problem" : 3,
    "A serious problem" : 4,
    "A very serious problem" : 5})

data7['ineq_increasing_r'] = data7['ineq_increasing_r'].map({
    "Decreasing" : 1,
    "Same" : 2,
    "Increasing" : 3})

data7.head(3)

In [None]:
data7['govt_regulation_effective'] = data7['govt_regulation_effective'].map({
    "1 - Extremely effective" : 5,
    "2" : 4,
    "3" : 3,
    "4" : 2,
    "5 - Not at all effective" : 1})

data7['govt_transfers_effective'] = data7['govt_transfers_effective'].map({
    "1 - Extremely effective" : 5,
    "2" : 4,
    "3" : 3,
    "4" : 2,
    "5 - Not at all effective" : 1})

data7['progressive_taxes_effective'] = data7['progressive_taxes_effective'].map({
    "1 - Extremely effective" : 5,
    "2" : 4,
    "3" : 3,
    "4" : 2,
    "5 - Not at all effective" : 1})

data7['educ_policies_effective'] = data7['educ_policies_effective'].map({
    "1 - Extremely effective" : 5,
    "2" : 4,
    "3" : 3,
    "4" : 2,
    "5 - Not at all effective" : 1})

data7['private_charity_effective'] = data7['private_charity_effective'].map({
    "1 - Extremely effective" : 5,
    "2" : 4,
    "3" : 3,
    "4" : 2,
    "5 - Not at all effective" : 1})

data7.head(3)

In [None]:
data7['govt_regulation_effective_r'] = data7['govt_regulation_effective_r'].map({
    "1 - Not at all effective" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5 - Extremely effective" : 5})

data7['govt_transfers_effective_r'] = data7['govt_transfers_effective_r'].map({
    "1 - Not at all effective" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5 - Extremely effective" : 5})

data7['progressive_taxes_effective_r'] = data7['progressive_taxes_effective_r'].map({
    "1 - Not at all effective" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5 - Extremely effective" : 5})

data7['educ_policies_effective_r'] = data7['educ_policies_effective_r'].map({
    "1 - Not at all effective" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5 - Extremely effective" : 5})

data7['private_charity_effective_r'] = data7['private_charity_effective_r'].map({
    "1 - Not at all effective" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5 - Extremely effective" : 5})

data7.head(3)

In [None]:
data7['govt_should_decrease_ineq'] = data7['govt_should_decrease_ineq'].map({
    "1 - The government should not concern itself with reducing income differences" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5" : 5,
    "6" : 6,
    "7 - The government ought to reduce the income differences between rich and poor—perhaps by \
raising the taxes of wealthy families or by giving income assistance to the poor" : 7
})

data7['companies_should_decrease_ineq'] = data7['companies_should_decrease_ineq'].map({
    "1 - Major companies should not concern themselves with reducing pay differences" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5" : 5,
    "6" : 6,
    "7 - Major companies ought to reduce the pay differences between employees with high pay \
and those with low pay—perhaps by reducing the pay of executives or by increasing the pay of unskilled workers" : 7
})

data7['who_most_responsible'] = data7['who_most_responsible'].map({
    "Government" : 6,
    "Major companies" : 5,
    "Private charities" : 4,
    "High income individuals themselves" : 3,
    "Low income individuals themselves" : 2,
    "Income differences do not need to be reduced" : 1})

data7.head(3)

In [None]:
data7['govt_should_decrease_ineq_r'] = data7['govt_should_decrease_ineq_r'].map({
    "1 - The government ought to reduce the income differences between rich and poor—perhaps by \
raising the taxes of wealthy families or by giving income assistance to the poor" : 7,
    "2" : 6,
    "3" : 5,
    "4" : 4,
    "5" : 3,
    "6" : 2,
    "7 - The government should not concern itself with reducing income differences" : 1
})

data7['companies_should_decrease_ineq_r'] = data7['companies_should_decrease_ineq_r'].map({
    "1 - Major companies ought to reduce the pay differences between employees with high pay \
and those with low pay—perhaps by reducing the pay of executives or by increasing the pay of unskilled workers" : 7,
    "2" : 6,
    "3" : 5,
    "4" : 4,
    "5" : 3,
    "6" : 2,
    "7 - Major companies should not concern themselves with reducing pay differences" : 1
})

data7['who_most_responsible_r'] = data7['who_most_responsible_r'].map({
    "Government" : 6,
    "Major companies" : 5,
    "Private charities" : 4,
    "High income individuals themselves" : 3,
    "Low income individuals themselves" : 2,
    "Income differences do not need to be reduced" : 1})

data7.head(3)

In [None]:
data7['is_resident'] = data7['is_resident'].map({
    "Yes" : 1,
    "No" : 0
})

data7.head(3)

In [None]:
data7['gender'] = data7['gender'].map({
    "Male" : 1,
    "Female" : 2,
    "Other" : 3
})

data7['age'] = data7['age'].astype(int)

data7['married'] = data7['married'].map({
    "Single" : 0,
    "Married" : 1
})

data7['has_children'] = data7['has_children'].map({
    "Yes" : 1,
    "No" : 0
})

data7.head(3)

In [None]:
data7['race'] = data7['race'].map({
    "European American/White" : 1,
    "African American/Black" : 2 ,
    "Hispanic/Latino" : 3,
    "Asian/Asian American" : 4,
    "Other" : 5
})

data7['religion'] = data7['religion'].map({
    "Christian (Protestant)" : 1,
    "Christian (Catholic)" : 2,
    "Christian (Mormon)" : 3,
    "Christian (Other)" : 4,
    "Jewish" : 5,
    "Muslim" : 6,
    "Hindu" : 7,
    "Buddhist" : 8,
    "Other religion" : 9,
    "No religion" : 10
})

data7['education'] = data7['education'].map({
    "Eighth Grade or Less" : 1,
    "Some High School" : 2,
    "High School Degree/GED" : 3,
    "Some College" : 4,
    "2-year College Degree" : 5,
    "4-year College Degree" : 6,
    "Master's Degree" : 7,
    "Doctoral Degree" : 8,
    "Professional Degree (JD, MD, MBA)" : 9,
})

data7.head(3)

In [None]:
data7['emp_status'] = data7['emp_status'].map({
    "Full-time employee" : 1,
    "Part-time employee" : 2,
    "Self-employed or small business owner" : 3,
    "Unemployed and looking for work" : 4,
    "Student" : 5,
    "Not in labor force (for example: retired, or full-time parent)" : 6,
})

In [None]:
data7['household_income'] = data7['household_income'].map({
    "$0 - $9,999": 1,
    "$10,000 - $14,999": 2,
    "$15,000 - $19,999": 3,
    "$20,000 - $29,999": 4,
    "$30,000 - $39,999": 5,
    "$40,000 - $49,999": 6,
    "$50,000 - $74,999": 7,
    "$75,000 - $99,999": 8,
    "$100,00 - $124,999": 9,
    "$125,000 - $149,999": 10,
    "$150,000 - $199,999": 11,
    "$200,000+": 12 
})

data7['subj_relative_income'] = data7['subj_relative_income'].map({
    "Far below average": 1,
    "Below average": 2,
    "Average": 3,
    "Above average": 4,
    "Far above average": 5
})

data7['income_volatile'] = data7['income_volatile'].map({
    "Income varies a lot from month to month":    3,
    "Income varies somewhat from month to month": 2,
    "Income is about the same each month":        1
})

data7.head(3)

In [None]:
data7['household_income_r'] = data7['household_income_r'].map({
    "$0 - $9,999": 1,
    "$10,000 - $14,999": 2,
    "$15,000 - $19,999": 3,
    "$20,000 - $29,999": 4,
    "$30,000 - $39,999": 5,
    "$40,000 - $49,999": 6,
    "$50,000 - $74,999": 7,
    "$75,000 - $99,999": 8,
    "$100,00 - $124,999": 9,
    "$125,000 - $149,999": 10,
    "$150,000 - $199,999": 11,
    "$200,000+": 12 
})

data7['subj_relative_income_r'] = data7['subj_relative_income_r'].map({
    "Far below average": 1,
    "Below average": 2,
    "Average": 3,
    "Above average": 4,
    "Far above average": 5
})

data7['income_volatile_r'] = data7['income_volatile_r'].map({
    "Income varies a lot from month to month":    3,
    "Income varies somewhat from month to month": 2,
    "Income is about the same each month":        1
})

data7.head(3)

In [None]:
data7['sol_better_than_past'] = data7['sol_better_than_past'].map({
    "Much better": 5,
    "Somewhat better": 4,
    "About the same": 3,
    "Somewhat worse": 2,
    "Much worse": 1
})

data7['sol_better_in_future'] = data7['sol_better_in_future'].map({
    "Much better": 5,
    "Somewhat better": 4,
    "About the same": 3,
    "Somewhat worse": 2,
    "Much worse": 1
})

data7.head(3)

In [None]:
data7['sol_better_than_past_r'] = data7['sol_better_than_past_r'].map({
    "Much better": 5,
    "Somewhat better": 4,
    "About the same": 3,
    "Somewhat worse": 2,
    "Much worse": 1
})

data7['sol_better_in_future_r'] = data7['sol_better_in_future_r'].map({
    "Much better": 5,
    "Somewhat better": 4,
    "About the same": 3,
    "Somewhat worse": 2,
    "Much worse": 1
})

data7.head(3)

In [None]:
data7['liberal'] = data7['liberal'].map({
    "Very conservative": 1,
    "Conservative": 2,
    "Moderate": 3,
    "Liberal": 4,
    "Very liberal": 5
})

data7['political_party'] = data7['political_party'].map({
    "Republican": 1,
    "Democrat": 2,
    "Independent": 3,
    "None": 4
})

data7.head(3)

In [None]:
data7['liberal_r'] = data7['liberal_r'].map({
    "Very conservative": 1,
    "Conservative": 2,
    "Moderate": 3,
    "Liberal": 4,
    "Very liberal": 5
})

data7['political_party_r'] = data7['political_party_r'].map({
    "Republican": 1,
    "Democrat": 2,
    "Independent": 3,
    "None": 4
})

data7.head(3)

In [None]:
data7['follow_news'] = data7['follow_news'].map({
    "Every day": 5,
    "A few times a week": 4,
    "Once a week": 3,
    "Less than once a week": 2,
    "Never": 1
})

data7['has_confidence_in_science'] = data7['has_confidence_in_science'].map({
    "A great deal of confidence": 3,
    "Only some confidence": 2,
    "Hardly any confidence at all": 1
})

data7.head(3)

In [None]:
data7['follow_news_r'] = data7['follow_news_r'].map({
    "Every day": 5,
    "A few times a week": 4,
    "Once a week": 3,
    "Less than once a week": 2,
    "Never": 1
})

data7['has_confidence_in_science_r'] = data7['has_confidence_in_science_r'].map({
    "A great deal of confidence": 3,
    "Only some confidence": 2,
    "Hardly any confidence at all": 1
})

data7.head(3)

In [None]:
data7['father_educ'] = data7['father_educ'].map({
    "Eighth Grade or Less" : 1,
    "Some High School" : 2,
    "High School Degree/GED" : 3,
    "Some College" : 4,
    "2-year College Degree" : 5,
    "4-year College Degree" : 6,
    "Master's Degree" : 7,
    "Doctoral Degree" : 8,
    "Professional Degree (JD, MD, MBA)" : 9,
    "Not Applicable" : 10
})

data7['mother_educ'] = data7['mother_educ'].map({
    "Eighth Grade or Less" : 1,
    "Some High School" : 2,
    "High School Degree/GED" : 3,
    "Some College" : 4,
    "2-year College Degree" : 5,
    "4-year College Degree" : 6,
    "Master's Degree" : 7,
    "Doctoral Degree" : 8,
    "Professional Degree (JD, MD, MBA)" : 9,
    "Not Applicable" : 10
})

In [None]:
data7['subj_relative_income_16_yrs'] = data7['subj_relative_income_16_yrs'].map({
    "Far below average": 1,
    "Below average": 2,
    "Average": 3,
    "Above average": 4,
    "Far above average": 5,
    "Not Applicable" : 6
})

data7['sol_better_than_parents'] = data7['sol_better_than_parents'].map({
    "Much better": 5,
    "Somewhat better": 4,
    "About the same": 3,
    "Somewhat worse": 2,
    "Much worse": 1,
    "Not Applicable" : 6
})

data7.head(3)

In [None]:
data7['subj_relative_income_16_yrs_r'] = data7['subj_relative_income_16_yrs_r'].map({
    "Far below average": 1,
    "Below average": 2,
    "Average": 3,
    "Above average": 4,
    "Far above average": 5,
    "Not Applicable" : 6
})

data7['sol_better_than_parents_r'] = data7['sol_better_than_parents_r'].map({
    "Much better": 5,
    "Somewhat better": 4,
    "About the same": 3,
    "Somewhat worse": 2,
    "Much worse": 1,
    "Not Applicable" : 6
})

data7.head(3)

In [None]:
data7['coronavirus_serious_threat'] = data7['coronavirus_serious_threat'].map({
    "Not a threat at all" : 1,
    "A small threat" : 2,
    "A threat" : 3,
    "A serious threat" : 4,
    "A very serious threat" : 5})

data7['must_save_economy'] = data7['must_save_economy'].map({
    "1 - Saving lives must be the priority even if it means the economy will suffer" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5 - Saving the economy must be the priority even if it means lives will be lost" : 5})

data7['satisfied_with_city'] = data7['satisfied_with_city'].map({
    "Very satisfied" : 5,
    "Fairly satisfied" : 4,
    "Neither satisfied nor dissatisfied" : 3,
    "Not very satisfied" : 2,
    "Not satisfied at all" : 1})

data7['satisfied_with_state'] = data7['satisfied_with_state'].map({
    "Very satisfied" : 5,
    "Fairly satisfied" : 4,
    "Neither satisfied nor dissatisfied" : 3,
    "Not very satisfied" : 2,
    "Not satisfied at all" : 1})

data7['satisfied_with_federal_govt'] = data7['satisfied_with_federal_govt'].map({
    "Very satisfied" : 5,
    "Fairly satisfied" : 4,
    "Neither satisfied nor dissatisfied" : 3,
    "Not very satisfied" : 2,
    "Not satisfied at all" : 1})

data7['days_out_in_past_week'] = data7['days_out_in_past_week'].map({
    "0" : 0,
    "1" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5" : 5,
    "6" : 6,
    "7" : 7})

data7.head(3)

In [None]:
data7['coronavirus_serious_threat_r'] = data7['coronavirus_serious_threat_r'].map({
    "Not a threat at all" : 1,
    "A small threat" : 2,
    "A threat" : 3,
    "A serious threat" : 4,
    "A very serious threat" : 5})

data7['must_save_economy_r'] = data7['must_save_economy_r'].map({
    "1 - Saving the economy must be the priority even if it means lives will be lost" : 5,
    "2" : 4,
    "3" : 3,
    "4" : 2,
    "5 - Saving lives must be the priority even if it means the economy will suffer" : 1})

data7['satisfied_with_city_r'] = data7['satisfied_with_city_r'].map({
    "Very satisfied" : 5,
    "Fairly satisfied" : 4,
    "Neither satisfied nor dissatisfied" : 3,
    "Not very satisfied" : 2,
    "Not satisfied at all" : 1})

data7['satisfied_with_state_r'] = data7['satisfied_with_state_r'].map({
    "Very satisfied" : 5,
    "Fairly satisfied" : 4,
    "Neither satisfied nor dissatisfied" : 3,
    "Not very satisfied" : 2,
    "Not satisfied at all" : 1})

data7['satisfied_with_federal_govt_r'] = data7['satisfied_with_federal_govt_r'].map({
    "Very satisfied" : 5,
    "Fairly satisfied" : 4,
    "Neither satisfied nor dissatisfied" : 3,
    "Not very satisfied" : 2,
    "Not satisfied at all" : 1})

data7['days_out_in_past_week_r'] = data7['days_out_in_past_week_r'].map({
    "0" : 0,
    "1" : 1,
    "2" : 2,
    "3" : 3,
    "4" : 4,
    "5" : 5,
    "6" : 6,
    "7" : 7})

data7.head(3)

### Combine regular and reverse-coded columns

In [None]:
data8 = data7.copy()

In [None]:
data8['wealthy_family_essential'].fillna(data8['wealthy_family_essential_r'], inplace = True)
data8['educated_parents_essential'].fillna(data8['educated_parents_essential_r'], inplace = True)
data8['having_ambition_essential'].fillna(data8['having_ambition_essential_r'], inplace = True)
data8['hard_work_essential'].fillna(data8['hard_work_essential_r'], inplace = True)
data8['talent_essential'].fillna(data8['talent_essential_r'], inplace = True)
data8['luck_essential'].fillna(data8['luck_essential_r'], inplace = True)
data8['connections_essential'].fillna(data8['connections_essential_r'], inplace = True)
data8['good_education_essential'].fillna(data8['good_education_essential_r'], inplace = True)
data8['political_influence_essential'].fillna(data8['political_influence_essential_r'], inplace = True)

In [None]:
data8['enough_opportunities'].fillna(data8['enough_opportunities_r'], inplace = True)
data8['more_opportunities_than_parents'].fillna(data8['more_opportunities_than_parents_r'], inplace = True)

In [None]:
data8['hard_work_brings_better_life'].fillna(data8['hard_work_brings_better_life_r'], inplace = True)
data8['people_poor_because_laziness'].fillna(data8['people_poor_because_laziness_r'], inplace = True)

In [None]:
data8['high_earners_deserve_high'].fillna(data8['high_earners_deserve_high_r'], inplace = True)
data8['low_earners_deserve_low'].fillna(data8['low_earners_deserve_low_r'], inplace = True)

In [None]:
data8['ineq_serious_problem'].fillna(data8['ineq_serious_problem_r'], inplace = True)
data8['poverty_serious_problem'].fillna(data8['poverty_serious_problem_r'], inplace = True)
data8['unequal_hcare_serious_problem'].fillna(data8['unequal_hcare_serious_problem_r'], inplace = True)
data8['ineq_increasing'].fillna(data8['ineq_increasing_r'], inplace = True)

In [None]:
data8['govt_regulation_effective'].fillna(data8['govt_regulation_effective_r'], inplace = True)
data8['govt_transfers_effective'].fillna(data8['govt_transfers_effective_r'], inplace = True)
data8['progressive_taxes_effective'].fillna(data8['progressive_taxes_effective_r'], inplace = True)
data8['educ_policies_effective'].fillna(data8['educ_policies_effective_r'], inplace = True)
data8['private_charity_effective'].fillna(data8['private_charity_effective_r'], inplace = True)

In [None]:
data8['govt_should_decrease_ineq'].fillna(data8['govt_should_decrease_ineq_r'], inplace = True)
data8['companies_should_decrease_ineq'].fillna(data8['companies_should_decrease_ineq_r'], inplace = True)
data8['who_most_responsible'].fillna(data8['who_most_responsible_r'], inplace = True)

In [None]:
data8['household_income'].fillna(data8['household_income_r'], inplace = True)
data8['subj_relative_income'].fillna(data8['subj_relative_income_r'], inplace = True)
data8['income_volatile'].fillna(data8['income_volatile_r'], inplace = True)
data8['sol_better_than_past'].fillna(data8['sol_better_than_past_r'], inplace = True)
data8['sol_better_in_future'].fillna(data8['sol_better_in_future_r'], inplace = True)
data8['liberal'].fillna(data8['liberal_r'], inplace = True)
data8['political_party'].fillna(data8['political_party_r'], inplace = True)
data8['follow_news'].fillna(data8['follow_news_r'], inplace = True)
data8['has_confidence_in_science'].fillna(data8['has_confidence_in_science_r'], inplace = True)

In [None]:
data8['subj_relative_income_16_yrs'].fillna(data8['subj_relative_income_16_yrs_r'], inplace = True)
data8['sol_better_than_parents'].fillna(data8['sol_better_than_parents_r'], inplace = True)

In [None]:
data8['coronavirus_serious_threat'].fillna(data8['coronavirus_serious_threat_r'], inplace = True)
data8['must_save_economy'].fillna(data8['must_save_economy_r'], inplace = True)
data8['satisfied_with_city'].fillna(data8['satisfied_with_city_r'], inplace = True)
data8['satisfied_with_state'].fillna(data8['satisfied_with_state_r'], inplace = True)
data8['satisfied_with_federal_govt'].fillna(data8['satisfied_with_federal_govt_r'], inplace = True)
data8['how_affected_by_coronavirus'].fillna(data8['how_affected_by_coronavirus_r'], inplace = True)
data8['how_affected_by_coronavirus_other'].fillna(data8['how_affected_by_coronavirus_other_r'], inplace = True)
data8['days_out_in_past_week'].fillna(data8['days_out_in_past_week_r'], inplace = True)

In [None]:
data8['GMC.3_DO'].fillna(data8['GMC.3R_DO'], inplace = True)
data8['DQ18_DO'].fillna(data8['DQ18R_DO'], inplace = True)

In [None]:
data8.head(3)

### Parse display orders

In [None]:
data9 = data8.copy()

In [None]:
def get_opinion_q_order(row):
    entry = row['perceptions_or_preferences_DO']
    temp_lst = entry.split("|")
    if temp_lst[0] == "FL_60":
        temp_str = row['perceptions_DO'] + "|" + row["preferences_DO"]
    elif temp_lst[0] == "FL_61":
        temp_str = row["preferences_DO"] + "|" + row['perceptions_DO']
    temp_lst2 = temp_str.split("|")
    temp_str2 = ""
    for b in temp_lst2:
        if b == "FL_72":
            if row['DS_DO'] == row['DS_DO']:
                temp_str2 += row['DS_DO'] + "|"
                temp_str2 = re.sub("DS.0|", "", temp_str2)
                temp_str2 = re.sub("DS.1", "wealthy_family_essential", temp_str2)
                temp_str2 = re.sub("DS.2", "educated_parents_essential", temp_str2)
                temp_str2 = re.sub("DS.3", "having_ambition_essential", temp_str2)
                temp_str2 = re.sub("DS.4", "hard_work_essential", temp_str2)
                temp_str2 = re.sub("DS.5", "talent_essential", temp_str2)
                temp_str2 = re.sub("DS.6", "luck_essential", temp_str2)
                temp_str2 = re.sub("DS.7", "connections_essential", temp_str2)
                temp_str2 = re.sub("DS.8", "good_education_essential", temp_str2)
                temp_str2 = re.sub("DS.9", "political_influence_essential", temp_str2)
            else:
                temp_str2 += row['DS_Reverse_DO'] + "|"
                temp_str2 = re.sub("DS.0R|", "", temp_str2)
                temp_str2 = re.sub("DS.1R", "wealthy_family_essential", temp_str2)
                temp_str2 = re.sub("DS.2R", "educated_parents_essential", temp_str2)
                temp_str2 = re.sub("DS.3R", "having_ambition_essential", temp_str2)
                temp_str2 = re.sub("DS.4R", "hard_work_essential", temp_str2)
                temp_str2 = re.sub("DS.5R", "talent_essential", temp_str2)
                temp_str2 = re.sub("DS.6R", "luck_essential", temp_str2)
                temp_str2 = re.sub("DS.7R", "connections_essential", temp_str2)
                temp_str2 = re.sub("DS.8R", "good_education_essential", temp_str2)
                temp_str2 = re.sub("DS.9R", "political_influence_essential", temp_str2)
        elif b == "FL_73":
            if row['EO_DO'] == row['EO_DO']:
                temp_str2 += row['EO_DO'] + "|"
                temp_str2 = re.sub("EO.1", "enough_opportunities", temp_str2)
                temp_str2 = re.sub("EO.2", "more_opportunities_than_parents", temp_str2)
            else:
                temp_str2 += row['EO_Reverse_DO'] + "|"
                temp_str2 = re.sub("EO.1R", "enough_opportunities", temp_str2)
                temp_str2 = re.sub("EO.2R", "more_opportunities_than_parents", temp_str2)
        elif b == "FL_74":
            if row['LHW_DO'] == row['LHW_DO']:
                temp_str2 += row['LHW_DO'] + "|"
                temp_str2 = re.sub("LHW.1", "hard_work_brings_better_life", temp_str2)
                temp_str2 = re.sub("LHW.2", "people_poor_because_laziness", temp_str2)
            else:
                temp_str2 += row['LHW_Reverse_DO'] + "|"
                temp_str2 = re.sub("LHW.1R", "hard_work_brings_better_life", temp_str2)
                temp_str2 = re.sub("LHW.2R", "people_poor_because_laziness", temp_str2)
        elif b == "FL_75":
            if row['ED_DO'] == row['ED_DO']:
                temp_str2 += row['ED_DO'] + "|"
                temp_str2 = re.sub("ED.1", "high_earners_deserve_high", temp_str2)
                temp_str2 = re.sub("ED.2", "low_earners_deserve_low", temp_str2)
            else:
                temp_str2 += row['ED_Reverse_DO'] + "|"
                temp_str2 = re.sub("ED.1R", "high_earners_deserve_high", temp_str2)
                temp_str2 = re.sub("ED.2R", "low_earners_deserve_low", temp_str2)
        elif b == "FL_76":
            if row['ISP_DO'] == row['ISP_DO']:
                temp_str2 += row['ISP_DO'] + "|"
                temp_str2 = re.sub("ISP.1", "ineq_serious_problem", temp_str2)
                temp_str2 = re.sub("ISP.2", "poverty_serious_problem", temp_str2)
                temp_str2 = re.sub("ISP.3", "unequal_hcare_serious_problem", temp_str2)
                temp_str2 = re.sub("ISP.4", "ineq_increasing", temp_str2)
            else:
                temp_str2 += row['ISP_Reverse_DO'] + "|"
                temp_str2 = re.sub("ISP.1R", "ineq_serious_problem", temp_str2)
                temp_str2 = re.sub("ISP.2R", "poverty_serious_problem", temp_str2)
                temp_str2 = re.sub("ISP.3R", "unequal_hcare_serious_problem", temp_str2)
                temp_str2 = re.sub("ISP.4R", "ineq_increasing", temp_str2)
        elif b == "FL_70":
            if row['GT_DO'] == row['GT_DO']:
                temp_str2 += row['GT_DO'] + "|"
                temp_str2 = re.sub("GT.1", "govt_regulation_effective", temp_str2)
                temp_str2 = re.sub("GT.2", "govt_transfers_effective", temp_str2)
                temp_str2 = re.sub("GT.3", "progressive_taxes_effective", temp_str2)
                temp_str2 = re.sub("GT.4", "educ_policies_effective", temp_str2)
                temp_str2 = re.sub("GT.5", "private_charity_effective", temp_str2)
            else:
                temp_str2 += row['GT_Reverse_DO'] + "|"
                temp_str2 = re.sub("GT.1R", "govt_regulation_effective", temp_str2)
                temp_str2 = re.sub("GT.2R", "govt_transfers_effective", temp_str2)
                temp_str2 = re.sub("GT.3R", "progressive_taxes_effective", temp_str2)
                temp_str2 = re.sub("GT.4R", "educ_policies_effective", temp_str2)
                temp_str2 = re.sub("GT.5R", "private_charity_effective", temp_str2)
        elif b == "FL_71":
            if row['GMC_DO'] == row['GMC_DO']:
                temp_str2 += row['GMC_DO'] + "|"
                temp_str2 = re.sub("GMC.1", "govt_should_decrease_ineq", temp_str2)
                temp_str2 = re.sub("GMC.2", "companies_should_decrease_ineq", temp_str2)
                temp_str2 = re.sub("GMC.3", "who_most_responsible", temp_str2)
            else:
                temp_str2 += row['GMC_Reverse_DO'] + "|"
                temp_str2 = re.sub("GMC.1R", "govt_should_decrease_ineq", temp_str2)
                temp_str2 = re.sub("GMC.2R", "companies_should_decrease_ineq", temp_str2)
                temp_str2 = re.sub("GMC.3R", "who_most_responsible", temp_str2)
    temp_str2 = temp_str2[:-1]
    display_order = temp_str2.split("|")
    return display_order

data9['opinion_q_DO'] = data9.apply(get_opinion_q_order, axis = 1)
data9.head()

### Fill in condition values

In [None]:
data10 = data9.copy()

In [None]:
def get_condition(row):
    if   row['text_shown'] == 'Coronavirustext':                  return "corona_control"
    elif row['text_shown'] == 'Coronavirusnaturalinequalitytext': return "corona_natural_ineq"
    elif row['text_shown'] == 'Coronavirusclassinequalitytext':   return "corona_class_ineq"
    elif row['text_shown'] == 'Internettext':                     return "control"
    elif row['text_shown'] == 'Naturalinequalitytext':            return "natural_ineq"
    elif row['text_shown'] == 'Classinequalitytext':              return "class_ineq"
    
data10['condition'] = data10.apply(get_condition, axis = 1)
data10.head()

In [None]:
data10.groupby("condition").count()

### Parse `how_affected_by_coronavirus` answers

In [None]:
data11 = data10.copy()

In [None]:
def get_how_affected_answers(row):
    entry = row['how_affected_by_coronavirus']
    lst = entry.split(",")
    return lst

data11['how_affected_by_coronavirus_lst'] = data11.apply(get_how_affected_answers, axis = 1)

In [None]:
def get_self_became_ill(row):
    if "I contracted coronavirus and became ill." in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0
    
def get_family_member_became_ill(row):
    if "Someone in my family contracted coronavirus and became ill." in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0

def get_self_lost_job(row):
    if "I lost my job because of coronavirus." in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0
    
def get_family_member_lost_job(row):
    if "Someone in my family lost their job because of coronavirus." in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0
    
def get_self_lost_income(row):
    if "I experienced a significant decrease in income due to coronavirus." in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0
    
def get_family_member_lost_income(row):
    if "Someone in my family experienced a significant decrease in income due to coronavirus." \
        in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0
    
def get_self_at_risk(row):
    if "I have an underlying medical condition that puts me at greater risk for severe illness." \
        in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0
    
def get_family_member_at_risk(row):
    if "Someone in my family has an underlying medical condition that puts them at greater risk for severe illness." \
        in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0
    
def get_not_affected(row):
    if "I have not been affected by coronavirus in any major way." in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0
    
def get_affected_in_other_way(row):
    if "Other (please specify)" in row['how_affected_by_coronavirus_lst']:
        return 1
    else:
        return 0

In [None]:
data11['self_became_ill'] = data11.apply(get_self_became_ill, axis = 1)

In [None]:
data11['family_member_became_ill'] = data11.apply(get_family_member_became_ill, axis = 1)

In [None]:
data11['self_lost_job'] = data11.apply(get_self_lost_job, axis = 1)

In [None]:
data11['family_member_lost_job'] = data11.apply(get_family_member_lost_job, axis = 1)

In [None]:
data11['self_lost_income'] = data11.apply(get_self_lost_income, axis = 1)

In [None]:
data11['family_member_lost_income'] = data11.apply(get_family_member_lost_income, axis = 1)

In [None]:
data11['self_at_risk'] = data11.apply(get_self_at_risk, axis = 1)

In [None]:
data11['family_member_at_risk'] = data11.apply(get_family_member_at_risk, axis = 1)

In [None]:
data11['not_affected'] = data11.apply(get_not_affected, axis = 1)

In [None]:
data11['affected_in_other_way'] = data11.apply(get_affected_in_other_way, axis = 1)

In [None]:
data11.head()

### Keep necessary variables only

In [None]:
data12 = data11[["condition",
                 "comprehension_q",
                 "wealthy_family_essential", "educated_parents_essential", "having_ambition_essential",
                 "hard_work_essential", "talent_essential", "luck_essential", "connections_essential",
                 "good_education_essential", "political_influence_essential",
                 "enough_opportunities", "more_opportunities_than_parents",
                 "hard_work_brings_better_life", "people_poor_because_laziness",
                 "high_earners_deserve_high", "low_earners_deserve_low",
                 "ineq_serious_problem", "poverty_serious_problem", "unequal_hcare_serious_problem",
                 "ineq_increasing",
                 "govt_regulation_effective", "govt_transfers_effective", "progressive_taxes_effective",
                 "educ_policies_effective", "private_charity_effective",
                 "govt_should_decrease_ineq", "companies_should_decrease_ineq",
                 "who_most_responsible",
                 "is_resident", "state",
                 "gender", "age", "married", "has_children",
                 "race", "religion", "education",
                 "emp_status", "occupation",
                 "household_income", "subj_relative_income", "income_volatile",
                 "sol_better_than_past", "sol_better_in_future",
                 "liberal", "political_party",
                 "follow_news", "has_confidence_in_science",
                 "father_occ", "mother_occ", "father_educ", "mother_educ",
                 "subj_relative_income_16_yrs", "sol_better_than_parents",
                 "coronavirus_serious_threat", "must_save_economy",
                 "satisfied_with_city", "satisfied_with_state", "satisfied_with_federal_govt",
                 "self_became_ill", "family_member_became_ill", "self_lost_income", "family_member_lost_income",
                 "self_at_risk", "family_member_at_risk", "not_affected", "affected_in_other_way",
                 "how_affected_by_coronavirus_other",
                 "days_out_in_past_week",
                 "opinion_q_DO",
                 "DS_regular_or_reverse", "EO_regular_or_reverse", "LHW_regular_or_reverse", 
                 "ED_regular_or_reverse", "ISP_regular_or_reverse", 
                 "GT_regular_or_reverse", "GMC_regular_or_reverse",
                 "Demog_end_regular_or_reverse", "Parents_end_regular_or_reverse", "Corona_regular_or_reverse",
                 "GMC.3_DO", "DQ18_DO"]]

print(len(data12))

In [None]:
data13 = data12[(data12['condition'] == "corona_control") | 
                (data12['condition'] == "corona_natural_ineq") | 
                (data12['condition'] == "corona_class_ineq") | 
                (data12['condition'] != data12['condition'])]

print(len(data13))

In [None]:
data14 = data12[(data12['condition'] == "control") | 
                (data12['condition'] == "natural_ineq") | 
                (data12['condition'] == "class_ineq") | 
                (data12['condition'] != data12['condition'])]

print(len(data14))

### Export to csv

In [None]:
path_to_processed_data = os.path.join("path/to/processed/data")

In [None]:
data12.to_csv(os.path.join(path_to_processed_data, "survey-exp-data.csv"))

In [None]:
data13.to_csv(os.path.join(path_to_processed_data, "survey-exp-data-corona.csv"))

In [None]:
data14.to_csv(os.path.join(path_to_processed_data, "survey-exp-data-general.csv"))