# Matrix Visualization Notebook

This notebook is for getting the data we need from HCMST2017 for the matrix visualization, which answers the question:

How does frequency of sex influence quality of relationship?


In [63]:
# The original HMCST2017 file comes as a Stata file, so I exported it from Stata as a CSV
import csv

In [64]:
data = []
with open('hcmst2017.csv', newline='', encoding='mac_roman') as csvfile:
    reader = csv.DictReader(csvfile)
    i = 0
    for row in reader:
        if i == 0:
            keys = list(row.keys())
            output = ''
            for key in keys:
                output += key + '\n'
            print(output)
        i += 1
        data.append(row)

CaseID
CASEID_NEW
qflag
weight1
weight1_freqwt
weight2
weight1a
weight1a_freqwt
weight_combo
weight_combo_freqwt
duration
speed_flag
consent
xlgb
S1
S2
S3
DOV_Branch
Q3_Refused
Q4
Q5
Q6A
Q6B
Q9
Q10
Q11
Q12
Q14
Q15A7
Q16
Q16_Refused
Q17A
Q17B
Q17C
Q17D
Q19
Q20
Q21A_Year
Q21A_Month
Q21B_Year
Q21B_Month
Q21C_Year
Q21C_Month
Q21D_Year
Q21D_Month
w6_identity
w6_outness
w6_outness_timing
Q23
Q24_Refused
Q25
Q26
Q27
Q28
w6_friend_connect_1
w6_friend_connect_2
w6_friend_connect_3
w6_friend_connect_4
w6_friend_connect_Refused
Q32
Q34
Q35_Refused
w6_sex_frequency
w6_otherdate
w6_how_many
w6_how_meet_Refused
w6_otherdate_app
w6_how_many_app
Past_Partner_Q1
w6_relationship_end_nonmar
w6_breakup_nonmar
w6_relationship_end_mar
w6_who_breakup
Q5_2
Q6A_2
Q9B_2
Q10_2
Q11_2
Q12_2
Q14_2
Q15A7_2_1
Q16_2
Q16_2_Codes
Q17B_2
Q17C_2
Q17D_2
Q20_2
Q21A_2_Year
Q21A_2_Month
Q21B_2_Year
Q21B_2_Month
Q21C_2_Year
Q21C_2_Month
Q21D_2_Year
Q21D_2_Month
Q21E_2_Year
Q21E_2_Month
Q21F_2_start_range
Q21F_2_Year
Q21F_2_Mon

In [65]:
print(data[2000]["DOV_Branch"])

2


In [66]:
sex_frequency_types = set()
for datum in data:
    sex_frequency_types.add(datum["w6_sex_frequency"])

In [67]:
quality_types = set()
for datum in data:
    quality_types.add(datum["w6_relationship_quality"])
print(quality_types)

{'', 'excellent', 'very poor', 'good', 'poor', 'fair'}


In [68]:
frequency_counts_simple = dict()

In [69]:
for person in data:
    key = person["w6_sex_frequency"]
    
    if key not in frequency_counts_simple:
        frequency_counts_simple[key] = 0
    
    frequency_counts_simple[key] += 1
    

In [70]:
frequency_counts_simple

{'': 654,
 'Once a month or less': 969,
 '2 to 3 times a month': 519,
 '3 to 6 times a week': 368,
 'Once or twice a week': 744,
 'Once a day or more': 86,
 'Refused': 170}

In [71]:
total_respondents = 0
for key in frequency_counts_simple:
    if not key == '' and not key == 'Refused':
        total_respondents += frequency_counts_simple[key]
print(total_respondents)

2686


In [75]:
counts_per_quality = dict()

In [82]:
for person in data:
    key = person["w6_relationship_quality"]
        
    if key not in counts_per_quality:
        counts_per_quality[key] = 0
    
    counts_per_quality[key] += 1
    

In [83]:
counts_per_quality

{'excellent': 3412,
 'good': 1768,
 'fair': 404,
 'very poor': 46,
 'poor': 64,
 '': 663}

In [17]:
matrix_data = {}

In [18]:
frequency_counts = dict()
for person in data:
    key1 = person["w6_sex_frequency"]
    key2 = person["w6_relationship_quality"]
    
    if key1 not in frequency_counts:
        frequency_counts[key1] = {}
    
    if key2 not in frequency_counts[key1]:
        frequency_counts[key1][key2] = 0
        
    frequency_counts[key1][key2] += 1
    

In [19]:
frequency_counts

{'': {'': 654},
 'Once a month or less': {'excellent': 482,
  'good': 359,
  'fair': 95,
  'poor': 19,
  'very poor': 13,
  '': 1},
 '2 to 3 times a month': {'good': 156,
  'excellent': 324,
  'fair': 28,
  'very poor': 5,
  'poor': 6},
 '3 to 6 times a week': {'excellent': 256, 'fair': 21, 'good': 90, 'poor': 1},
 'Once or twice a week': {'good': 206,
  'excellent': 495,
  'fair': 36,
  'very poor': 1,
  '': 2,
  'poor': 4},
 'Once a day or more': {'excellent': 53,
  'fair': 9,
  'good': 22,
  'poor': 1,
  'very poor': 1},
 'Refused': {'good': 51,
  'excellent': 96,
  '': 6,
  'fair': 13,
  'very poor': 3,
  'poor': 1}}

In [33]:
# OLD
# create the fresh export_data

f = frequency_counts
export_data = []
for freq in f:
    if freq == '' or freq == 'Refused':
        continue
    if 'very poor' not in f[freq]:
        f[freq]['very poor'] = 0
        
    responses_by_freq = frequency_counts_simple[freq]
        
    export_data.append({
        "frequency": freq,
        "respondents": responses_by_freq,
        "ratings": {
            "num_respondents": [
                f[freq]['very poor'],
                f[freq]['poor'],
                f[freq]['fair'],
                f[freq]['good'],
                f[freq]['excellent']
            ],
            "ratio_in_this_freq": [
                f[freq]['very poor'] / responses_by_freq,
                f[freq]['poor'] / responses_by_freq,
                f[freq]['fair'] / responses_by_freq,
                f[freq]['good'] / responses_by_freq,
                f[freq]['excellent'] / responses_by_freq
            ]
        }
    })

In [84]:
# create the fresh export_data

f = frequency_counts
export_data = []
for freq in f:
    if freq == '' or freq == 'Refused':
        continue
    if 'very poor' not in f[freq]:
        f[freq]['very poor'] = 0
        
    responses_by_freq = frequency_counts_simple[freq]
        
    counts = f[freq]
        
    export_data.append({
        "frequency": freq,
        "respondents": responses_by_freq,
        "respondents_per_quality": [
                f[freq]['very poor'],
                f[freq]['poor'],
                f[freq]['fair'],
                f[freq]['good'],
                f[freq]['excellent']
            ],
        "ratios_per_quality": {
            "across_all_freqs": [
                {
                    "ratio": f[freq]['very poor'] / total_respondents, 
                    "num_respondents": f[freq]['very poor']
                },
                {
                    "ratio": f[freq]['poor'] / total_respondents, 
                    "num_respondents": f[freq]['poor']
                },
                {
                    "ratio": f[freq]['fair'] / total_respondents, 
                    "num_respondents": f[freq]['fair']
                },
                {
                    "ratio": f[freq]['good'] / total_respondents, 
                    "num_respondents": f[freq]['good']
                },
                {
                    "ratio": f[freq]['excellent'] / total_respondents, 
                    "num_respondents": f[freq]['excellent']
                }
            ],
            "across_this_freq": [
                {
                    "ratio": f[freq]['very poor'] / responses_by_freq,
                    "num_respondents": f[freq]['very poor']
                },
                {
                    "ratio": f[freq]['poor'] / responses_by_freq,
                    "num_respondents": f[freq]['poor']
                },
                {
                    "ratio": f[freq]['fair'] / responses_by_freq,
                    "num_respondents": f[freq]['fair']
                },
                {
                    "ratio": f[freq]['good'] / responses_by_freq,
                    "num_respondents": f[freq]['good']
                },
                {
                    "ratio": f[freq]['excellent'] / responses_by_freq,
                    "num_respondents": f[freq]['excellent']
                },
            ],
            "across_qualities": [
                {
                    "ratio": f[freq]['very poor'] / counts_per_quality['very poor'],
                    "num_respondents": f[freq]['very poor']
                },
                {
                    "ratio": f[freq]['poor'] / counts_per_quality['poor'],
                    "num_respondents": f[freq]['poor']
                },
                {
                    "ratio": f[freq]['fair'] / counts_per_quality['fair'],
                    "num_respondents": f[freq]['fair']
                },
                {
                    "ratio": f[freq]['good'] / counts_per_quality['good'],
                    "num_respondents": f[freq]['good']
                },
                {
                    "ratio": f[freq]['excellent'] / counts_per_quality['excellent'],
                    "num_respondents": f[freq]['excellent']
                },
            ]
        }
    })

In [85]:
export_data

[{'frequency': 'Once a month or less',
  'respondents': 969,
  'respondents_per_quality': [13, 19, 95, 359, 482],
  'ratios_per_quality': {'across_all_freqs': [{'ratio': 0.004839910647803425,
     'num_respondents': 13},
    {'ratio': 0.007073715562174236, 'num_respondents': 19},
    {'ratio': 0.03536857781087118, 'num_respondents': 95},
    {'ratio': 0.1336559940431869, 'num_respondents': 359},
    {'ratio': 0.17944899478778853, 'num_respondents': 482}],
   'across_this_freq': [{'ratio': 0.013415892672858616, 'num_respondents': 13},
    {'ratio': 0.0196078431372549, 'num_respondents': 19},
    {'ratio': 0.09803921568627451, 'num_respondents': 95},
    {'ratio': 0.37048503611971106, 'num_respondents': 359},
    {'ratio': 0.49742002063983487, 'num_respondents': 482}],
   'across_qualities': [{'ratio': 0.2826086956521739, 'num_respondents': 13},
    {'ratio': 0.296875, 'num_respondents': 19},
    {'ratio': 0.23514851485148514, 'num_respondents': 95},
    {'ratio': 0.20305429864253394, 'n

In [86]:
import json

In [87]:
export_json = json.dumps(export_data, indent=2)

In [88]:
print(export_json)

[
  {
    "frequency": "Once a month or less",
    "respondents": 969,
    "respondents_per_quality": [
      13,
      19,
      95,
      359,
      482
    ],
    "ratios_per_quality": {
      "across_all_freqs": [
        {
          "ratio": 0.004839910647803425,
          "num_respondents": 13
        },
        {
          "ratio": 0.007073715562174236,
          "num_respondents": 19
        },
        {
          "ratio": 0.03536857781087118,
          "num_respondents": 95
        },
        {
          "ratio": 0.1336559940431869,
          "num_respondents": 359
        },
        {
          "ratio": 0.17944899478778853,
          "num_respondents": 482
        }
      ],
      "across_this_freq": [
        {
          "ratio": 0.013415892672858616,
          "num_respondents": 13
        },
        {
          "ratio": 0.0196078431372549,
          "num_respondents": 19
        },
        {
          "ratio": 0.09803921568627451,
          "num_respondents": 95
        },

In [89]:
import os 
data = export_json
os.system("echo '%s' | pbcopy" % data)

0