# Age Difference Scatterplot Data Processing Notebook

This notebook is for getting the data we need from HCMST2017 for the age difference and marital status scatterplot, which answers the question:

How does age difference influence marital status?


In [3]:
# The original HMCST2017 file comes as a Stata file, so I exported it from Stata as a CSV
import csv

In [4]:
data = []
with open('hcmst2017.csv', newline='', encoding='mac_roman') as csvfile:
    reader = csv.DictReader(csvfile)
    i = 0
    for row in reader:
        if i == 0:
            keys = list(row.keys())
            output = ''
            for key in keys:
                output += key + '\n'
            print(output)
        i += 1
        data.append(row)

CaseID
CASEID_NEW
qflag
weight1
weight1_freqwt
weight2
weight1a
weight1a_freqwt
weight_combo
weight_combo_freqwt
duration
speed_flag
consent
xlgb
S1
S2
S3
DOV_Branch
Q3_Refused
Q4
Q5
Q6A
Q6B
Q9
Q10
Q11
Q12
Q14
Q15A7
Q16
Q16_Refused
Q17A
Q17B
Q17C
Q17D
Q19
Q20
Q21A_Year
Q21A_Month
Q21B_Year
Q21B_Month
Q21C_Year
Q21C_Month
Q21D_Year
Q21D_Month
w6_identity
w6_outness
w6_outness_timing
Q23
Q24_Refused
Q25
Q26
Q27
Q28
w6_friend_connect_1
w6_friend_connect_2
w6_friend_connect_3
w6_friend_connect_4
w6_friend_connect_Refused
Q32
Q34
Q35_Refused
w6_sex_frequency
w6_otherdate
w6_how_many
w6_how_meet_Refused
w6_otherdate_app
w6_how_many_app
Past_Partner_Q1
w6_relationship_end_nonmar
w6_breakup_nonmar
w6_relationship_end_mar
w6_who_breakup
Q5_2
Q6A_2
Q9B_2
Q10_2
Q11_2
Q12_2
Q14_2
Q15A7_2_1
Q16_2
Q16_2_Codes
Q17B_2
Q17C_2
Q17D_2
Q20_2
Q21A_2_Year
Q21A_2_Month
Q21B_2_Year
Q21B_2_Month
Q21C_2_Year
Q21C_2_Month
Q21D_2_Year
Q21D_2_Month
Q21E_2_Year
Q21E_2_Month
Q21F_2_start_range
Q21F_2_Year
Q21F_2_Mon

In [5]:
print(data[2000]["DOV_Branch"])

2


In [6]:
participant_age = set()
for datum in data:
    participant_age.add(datum["ppage"])
print(participant_age)

{'32', '45', '63', '67', '34', '23', '74', '76', '42', '21', '61', '79', '37', '59', '19', '68', '75', '44', '89', '78', '57', '35', '83', '40', '36', '27', '81', '33', '50', '70', '29', '86', '55', '30', '25', '26', '46', '56', '47', '41', '80', '85', '72', '71', '39', '91', '77', '66', '92', '54', '48', '73', '69', '65', '90', '22', '28', '58', '38', '87', '88', '49', '64', '31', '60', '51', '62', '52', '93', '53', '84', '24', '43', '82', '20', '18'}


In [7]:
partner_age = set()
for datum in data:
    partner_age.add(datum["Q9"])
print(partner_age)

{'', '67', '63', '17', '10', '61', '37', '75', '81', '40', '95', '50', '26', '41', '80', '47', '71', '77', '66', '54', '73', '69', '16', '90', '22', '28', '60', '45', '76', '21', '59', '19', '57', '36', '27', '70', '55', '85', '72', '87', '52', '53', '20', '68', '44', '89', '78', '83', '33', '86', '25', '56', '48', '58', '38', '49', '64', '51', '31', 'Refused', '32', '79', '34', '23', '74', '42', '35', '29', '30', '46', '39', '91', '65', '62', '14', '84', '24', '43', '82', '18'}


In [10]:
marital_status = set()
for datum in data:
    marital_status.add(datum["ppmarit"])
print(marital_status)

{'Living with partner', 'Married', 'Never married', 'Separated', 'Widowed', 'Divorced'}


In [11]:
export_data = []
for person in data:
    if not person["Q9"] or person["Q9"] == 'Refused':
        continue
    export_data.append(
        { "participant_age": int(person["ppage"]), 
         "partner_age": int(person["Q9"]),
        "marital_status": person["ppmarit"]}
    )  

In [12]:
export_data

[{'participant_age': 55, 'partner_age': 52, 'marital_status': 'Married'},
 {'participant_age': 47, 'partner_age': 45, 'marital_status': 'Married'},
 {'participant_age': 59, 'partner_age': 59, 'marital_status': 'Married'},
 {'participant_age': 59, 'partner_age': 60, 'marital_status': 'Married'},
 {'participant_age': 66, 'partner_age': 67, 'marital_status': 'Married'},
 {'participant_age': 65, 'partner_age': 65, 'marital_status': 'Married'},
 {'participant_age': 33, 'partner_age': 43, 'marital_status': 'Married'},
 {'participant_age': 37, 'partner_age': 35, 'marital_status': 'Married'},
 {'participant_age': 38, 'partner_age': 42, 'marital_status': 'Married'},
 {'participant_age': 34,
  'partner_age': 37,
  'marital_status': 'Living with partner'},
 {'participant_age': 46, 'partner_age': 40, 'marital_status': 'Married'},
 {'participant_age': 49, 'partner_age': 40, 'marital_status': 'Married'},
 {'participant_age': 66, 'partner_age': 72, 'marital_status': 'Married'},
 {'participant_age': 4

In [13]:
import json

In [14]:
export_json = json.dumps(export_data)

In [15]:
export_json

'[{"participant_age": 55, "partner_age": 52, "marital_status": "Married"}, {"participant_age": 47, "partner_age": 45, "marital_status": "Married"}, {"participant_age": 59, "partner_age": 59, "marital_status": "Married"}, {"participant_age": 59, "partner_age": 60, "marital_status": "Married"}, {"participant_age": 66, "partner_age": 67, "marital_status": "Married"}, {"participant_age": 65, "partner_age": 65, "marital_status": "Married"}, {"participant_age": 33, "partner_age": 43, "marital_status": "Married"}, {"participant_age": 37, "partner_age": 35, "marital_status": "Married"}, {"participant_age": 38, "partner_age": 42, "marital_status": "Married"}, {"participant_age": 34, "partner_age": 37, "marital_status": "Living with partner"}, {"participant_age": 46, "partner_age": 40, "marital_status": "Married"}, {"participant_age": 49, "partner_age": 40, "marital_status": "Married"}, {"participant_age": 66, "partner_age": 72, "marital_status": "Married"}, {"participant_age": 41, "partner_age"