## Load Data

In [6]:
import json

with open("../preprocessing/AutismPhenotype.json") as schema_file:    
    pheno_schema = json.load(schema_file)

In [20]:
import csv
import numpy as np

with open("../data/all_samples_both_instruments_filtered.csv") as f:
    reader = csv.reader(f)
    header = next(reader)[1:]
    sample_names, data = zip(*[(x[0], x[1:]) for x in reader])
    
data = np.asarray(data)
print(data.shape)

(9100, 123)


## Types of ADIR questions

In [13]:
from collections import defaultdict

datatype_to_q = defaultdict(set)

for key, value in pheno_schema['definitions']['ADIR']['properties'].items():
    if 'data-type' in value:
        if key.endswith('a'):
            datatype_to_q['categorical a'].add(key[:-1])
        elif '.' in key:
            datatype_to_q['ordinal double'].add(key.split('.')[0])
        else:
            datatype_to_q[value['data-type']].add(key)
        
for key, value in datatype_to_q.items():
    print(key, len(value), value, sep='\t')


score	16	{'communication', 'B1', 'A2', 'abnormality_evident_before_3_years', 'A3', 'C2', 'restricted_repetitive_behavior', 'social_interaction', 'A4', 'B3', 'C1', 'B4', 'C3', 'B2', 'A1', 'C4'}
interval	14	{'Q08', 'Q05', 'Q28', 'Q19', 'Q10', 'Q09', 'Q06', 'Q86', 'Q87', 'Q07', 'Q26', 'Q17', 'Q02', 'Q04'}
categorical a	145	{'Q84.2', 'Q82.2', 'Q64.1', 'Q65.2', 'Q78.2', 'Q48.1', 'Q36.2', 'Q36.1', 'Q34.1', 'Q34.2', 'Q61.1', 'Q71.1', 'Q32.2', 'Q57.2', 'Q69.2', 'Q04', 'Q08', 'Q92.1', 'Q28', 'Q29.1', 'Q10', 'Q70.2', 'Q27', 'Q52.2', 'Q92.2', 'Q56.1', 'Q46.2', 'Q33.1', 'Q80.1', 'Q18', 'Q60.1', 'Q55.2', 'Q72.1', 'Q83.2', 'Q31.1', 'Q42.2', 'Q51.2', 'Q59.2', 'Q90.1', 'Q66.1', 'Q72.2', 'Q50.1', 'Q38.2', 'Q16', 'Q35.2', 'Q15', 'Q20', 'Q47.1', 'Q85.1', 'Q17', 'Q43.2', 'Q49.2', 'Q48.2', 'Q05', 'Q51.1', 'Q43.1', 'Q68.1', 'Q39.1', 'Q14', 'Q69.1', 'Q26', 'Q41.1', 'Q12', 'Q42.1', 'Q44.2', 'Q93.1', 'Q50.2', 'Q68.2', 'Q07', 'Q90.2', 'Q52.1', 'Q31.2', 'Q53.2', 'Q35.1', 'Q54.1', 'Q39.2', 'Q41.2', 'Q66.2', 'Q02'

## Types of ADOS questions

In [22]:
from collections import defaultdict

datatype_to_q = defaultdict(set)

for key, value in pheno_schema['definitions']['ADOS']['properties'].items():
    if 'data-type' in value:
        if key.endswith('a'):
            datatype_to_q['categorical a'].add(key[:-1])
        elif '.' in key:
            datatype_to_q['ordinal double'].add(key.split('.')[0])
        else:
            datatype_to_q[value['data-type']].add(key)
        
for key, value in datatype_to_q.items():
    print(key, len(value), value, sep='\t')


score	3	{'communication', 'restricted_repetitive_behavior', 'social_interaction'}
ordinal	46	{'QB06', 'QB14', 'QA07', 'QB23', 'QC02', 'QB21', 'QA12', 'QB10', 'QB18', 'QB01', 'QA02', 'QD03', 'QB02', 'QA05', 'QA06', 'QB17', 'QB11', 'QA08', 'QB15', 'QB08', 'QB16', 'QB09', 'QA01', 'QB19', 'QD04', 'QE03', 'QB22', 'QD01', 'QB07', 'QB05', 'QB20', 'QA10', 'QA03', 'QA13', 'QC01', 'QB03', 'QD02', 'QD05', 'QA11', 'QA09', 'QA04', 'QB12', 'QE02', 'QE01', 'QB04', 'QB13'}


In [50]:
feature_mapping = {
	'ADOS_Module1': {
		"QA01": "QA01", "QA02": "QA11", "QA03": "QA02", "QA04": "QA03", "QA05": "QA04", 
		"QA06": "QA12", "QA07": "QA07", "QA08": "QA10",
		"QB01": "QB01", "QB02": "QB17", "QB03": "QB02", "QB04": "QB16", "QB05": "QB04",
		"QB06": "QB07", "QB07": "QB19", "QB08": "QB21", "QB09": "QB08", "QB10": "QB10", 
		"QB11": "QB09", "QB12": "QB13", "QB13.1": "QB11", "QB13.2": "QB23", "QB14": "QB14", "QB15": "QB22",
		"QB16": "QB15",
		"QC01": "QC01", "QC02": "QC02",
		"QD01": "QD01", "QD02": "QD02", "QD03": "QD03", "QD04": "QD04",
		"QE01": "QE01", "QE02": "QE02", "QE03": "QE03"
	},
	'ADOS_Module2': {
		"QA01": "QA01", "QA02": "QA02", "QA03": "QA03", "QA04": "QA04", "QA05": "QA09",
		"QA06": "QA07", "QA07": "QA10",
		"QB01": "QB01", "QB02": "QB02", "QB03": "QB04", "QB04": "QB07", "QB05": "QB08",
		"QB06": "QB10", "QB07": "QB09", "QB08": "QB13", "QB09.1": "QB11", "QB09.2": "QB23", "QB10": "QB14",
		"QB11": "QB12", "QB12": "QB15", 
		"QC01": "QC01", "QC02": "QC02",
		"QD01": "QD01", "QD02": "QD02", "QD03": "QD03", "QD04": "QD04",
		"QE01": "QE01", "QE02": "QE02", "QE03": "QE03"
	},
	'ADOS_Module3': {
		"QA01": "QA01", "QA02": "QA02", "QA03": "QA03", "QA04": "QA04", "QA05": "QA05", 
		"QA06": "QA06", "QA07": "QA08", "QA08": "QA09", "QA09": "QA10",
		"QB01": "QB01", "QB02": "QB02", "QB03": "QB03", "QB04": "QB04", "QB05": "QB05",
		"QB06": "QB06", "QB07": "QB13", "QB08": "QB11", "QB09": "QB14", "QB10": "QB12",
		"QB11": "QB15",
		"QC01": "QC02",
		"QD01": "QD01", "QD02": "QD02", "QD03": "QD03", "QD04": "QD04", "QD05": "QD05",
		"QE01": "QE01", "QE02": "QE02", "QE03": "QE03"

	},
	'ADOS_Module4': {
		"QA01": "QA01", "QA02": "QA02", "QA03": "QA03", "QA04": "QA04", "QA05": "QA05", 
		"QA06": "QA06", "QA07": "QA08", "QA08": "QA09", "QA09": "QA10", "QA10": "QA13",
		"QB01": "QB01", "QB02": "QB02", "QB03": "QB03", "QB04": "QB04", "QB05": "QB18",
		"QB06": "QB05", "QB07": "QB06", "QB08": "QB20", "QB09": "QB13", "QB10": "QB11",
		"QB11": "QB14", "QB12": "QB12", "QB13": "QB15",
		"QC01": "QC02",
		"QD01": "QD01", "QD02": "QD02", "QD03": "QD03", "QD04": "QD04", "QD05": "QD05",
		"QE01": "QE01", "QE02": "QE02", "QE03": "QE03"
	}
}

agg_mod_to_mods = defaultdict(dict)

for module in feature_mapping.keys():
    for key, value in feature_mapping[module].items():
        agg_mod_to_mods[value][module] = key

for key in sorted(agg_mod_to_mods.keys()):
    values = agg_mod_to_mods[key]
    print(key, '' if 'ADOS_Module1' not in values else values['ADOS_Module1'],
         '' if 'ADOS_Module2' not in values else values['ADOS_Module2'],
         '' if 'ADOS_Module3' not in values else values['ADOS_Module3'],
         ('' if 'ADOS_Module4' not in values else values['ADOS_Module4']) + '\\\\', sep=' & ')


QA01 & QA01 & QA01 & QA01 & QA01\\
QA02 & QA03 & QA02 & QA02 & QA02\\
QA03 & QA04 & QA03 & QA03 & QA03\\
QA04 & QA05 & QA04 & QA04 & QA04\\
QA05 &  &  & QA05 & QA05\\
QA06 &  &  & QA06 & QA06\\
QA07 & QA07 & QA06 &  & \\
QA08 &  &  & QA07 & QA07\\
QA09 &  & QA05 & QA08 & QA08\\
QA10 & QA08 & QA07 & QA09 & QA09\\
QA11 & QA02 &  &  & \\
QA12 & QA06 &  &  & \\
QA13 &  &  &  & QA10\\
QB01 & QB01 & QB01 & QB01 & QB01\\
QB02 & QB03 & QB02 & QB02 & QB02\\
QB03 &  &  & QB03 & QB03\\
QB04 & QB05 & QB03 & QB04 & QB04\\
QB05 &  &  & QB05 & QB06\\
QB06 &  &  & QB06 & QB07\\
QB07 & QB06 & QB04 &  & \\
QB08 & QB09 & QB05 &  & \\
QB09 & QB11 & QB07 &  & \\
QB10 & QB10 & QB06 &  & \\
QB11 & QB13.1 & QB09.1 & QB08 & QB10\\
QB12 &  & QB11 & QB10 & QB12\\
QB13 & QB12 & QB08 & QB07 & QB09\\
QB14 & QB14 & QB10 & QB09 & QB11\\
QB15 & QB16 & QB12 & QB11 & QB13\\
QB16 & QB04 &  &  & \\
QB17 & QB02 &  &  & \\
QB18 &  &  &  & QB05\\
QB19 & QB07 &  &  & \\
QB20 &  &  &  & QB08\\
QB21 & QB08 &  &  & \\
QB22 & QB1

## Response distribution

In [21]:
import plotly.plotly as py
from plotly.graph_objs import *
import plotly
from IPython.display import HTML
import colorlover as cl
import numpy as np

unique, counts = np.unique(data, return_counts=True)
dict(zip(unique, counts/sum(counts)))

py.iplot([Pie(values=counts/sum(counts),
            labels=unique)])
