# Nutrition, Physical Activity, and Obesity - Behavioral Risk Factor Surveillance System

The data used here is from Data.gov, managed and hosted by the U.S. General Services Administration, Technology Transformation Service (https://catalog.data.gov)

The dataset includes data on adult's diet, physical activity, and weight status from Behavioral Risk Factor Surveillance System. This data is used for DNPAO's Data, ...

## Setup

First we need to import the some libraries

In [1]:
import json
import sys

import matplotlib as mpl
import matplotlib.pyplot as plt
import networkx as nx

import urllib.request
from xnd import xnd

In [2]:
def get_key_value(item):
    if isinstance(item, dict):
        return item.items()
    elif isinstance(item, list):
        return enumerate(item)
    return ()

In [3]:
def test():
    test_data = {
        'tree': {
            'leaf1': [1, 2],
            'leaf2': [2, 4]}, 
        'cars': [
            {'model': 'model1', 'color': 'red', 'year': 2010, 'ports': None},
            {'model': 'model2', 'color': 'blue', 'year': None},
            {'ports': {'numbers': 4}}
        ]
    }
    return json_meta_type(test_data)

In [4]:
def highest_type(values):
    """
    TODO: implement a way to treat "type weight"
    
    values: list
    returns: type
    """
    _types = {type(v) for v in values if v is not None}
    return _types.pop()

highest_type([1,3,4,5, None])    

int

In [15]:
def json_meta_type(data):
    """The function should return a json with all meta data normalized."""
    meta = {}

    for k, item in get_key_value(data):
        meta[k] = {'value_type': item.__class__}
        meta_k = meta[k]  # alias
        if meta_k['value_type'] is type(None):
            meta_k['optional'] = True
        
        if isinstance(item, dict):
            meta_k['value'] = json_meta_type(item)
        elif isinstance(item, list):
            meta_k['counts'] = len(item)
            _value_highest_type = highest_type(item)
            
            if _value_highest_type is dict:
                meta_k['value'] = {}
                meta_k_value = meta_k['value']  # alias
            
            for v in item:
                if _value_highest_type != type(v):
                    raise Exception('All items from the list should be the same type.')

                if isinstance(v, dict):
                    children_value = json_meta_type(v)
                    
                    for _k, _v in children_value.items():
                        if (
                            _k not in meta_k_value or 
                            not _v['value_type'] is type(None)
                        ):
                            meta_k_value[_k] = _v
                        if _v['value_type'] is type(None):
                            meta_k_value[_k]['optional'] = True
                    
            if 'value' not in meta_k:
                meta_k['value'] = _value_highest_type
    return meta

test()

{'tree': {'value_type': dict,
  'value': {'leaf1': {'value_type': list, 'counts': 2, 'value': int},
   'leaf2': {'value_type': list, 'counts': 2, 'value': int}}},
 'cars': {'value_type': list,
  'counts': 3,
  'value': {'model': {'value_type': str},
   'color': {'value_type': str},
   'year': {'value_type': int, 'optional': True},
   'ports': {'value_type': dict, 'value': {'numbers': {'value_type': int}}}}}}

In [6]:
test_type = '''
    {tree : {leaf1 : 2 * int64, leaf2 : 2 * int64}, 
     cars : 3 * {model : ?string, color : ?string, year : ?int64, 
                 ports : ?{numbers : ?int64}}}
'''

test_data = {
    'tree': {
        'leaf1': [1, 2],
        'leaf2': [2, 4]}, 
    'cars': [
        {'model': 'model1', 'color': 'red', 'year': 2010, 'ports': None},
        {'model': 'model2', 'color': 'blue', 'year': None},
        {'ports': {'numbers': 4}}
    ]
}
json_meta_type(test_data)

{'tree': {'value_type': dict,
  'value': {'leaf1': {'value_type': list, 'counts': 2, 'value': int},
   'leaf2': {'value_type': list, 'counts': 2, 'value': int}}},
 'cars': {'value_type': list,
  'counts': 3,
  'value': {'model': {'value_type': str},
   'color': {'value_type': str},
   'year': {'value_type': int},
   'ports': {'value_type': dict, 'value': {'numbers': {'value_type': int}}}}}}

In [7]:
test_type = '''
    {tree : {leaf1 : 2 * int64, leaf2 : 2 * int64}, 
     cars : 3 * {model : ?string, color : ?string, year : ?int64, 
                 ports : ?{numbers : ?int64}}}
'''

test_data = {
    'tree': {
        'leaf1': [1, 2],
        'leaf2': [2, 4]}, 
    'cars': [
        {'model': 'model1', 'color': 'red', 'year': 2010, 'ports': None},
        {'model': 'model2', 'color': 'blue', 'year': None, 'ports': {'numbers': None}},
        {'model': None, 'color': None, 'year': None, 'ports': {'numbers': 4}}
    ]
}
xnd(test_data, type=test_type)

xnd({'tree': {'leaf1': [1, 2], 'leaf2': [2, 4]},
     'cars': [{'model': 'model1', 'color': 'red', 'year': 2010, 'ports': None},
      {'model': 'model2', 'color': 'blue', 'year': None, 'ports': {'numbers': None}},
      {'model': None, 'color': None, 'year': None, 'ports': {'numbers': 4}}]},
    type='{tree : {leaf1 : 2 * int64, leaf2 : 2 * int64}, cars : 3 * {model : ?string, color : ?string, year : ?int64, ports : ?{numbers : ?int64}}}')

## Loading Data

The data for this tutorial could be downloaded at https://chronicdata.cdc.gov/views/hn4x-zwk7/rows.json?accessType=DOWNLOAD

We already have this data at `data/data-gov-nutrition.json`

In [8]:
with open('data/data-gov-nutrition.json') as f:
    data = json.load(f)    

In [9]:
str_summary = str(data)[:500] + '...'
print(str_summary.replace(', ', ',\n'))

{'meta': {'view': {'id': 'hn4x-zwk7',
'name': 'Nutrition,
Physical Activity,
and Obesity - Behavioral Risk Factor Surveillance System',
'attribution': 'Centers for Disease Control and Prevention (CDC),
National Center for Chronic Disease Prevention and Health Promotion,
Division of Nutrition,
Physical Activity,
and Obesity',
'attributionLink': 'http://www.cdc.gov/nccdphp/DNPAO/index.html',
'averageRating': 0,
'category': 'Nutrition,
Physical Activity,
and Obesity',
'createdAt': 1469123618,
'desc...


In [10]:
xnd

xnd.xnd

In [11]:
normalize_json_meta(data)

NameError: name 'normalize_json_meta' is not defined

In [None]:
data_norm == data

In [None]:
def walk(G, node):
    if isinstance(node, dict):
        for key, item in node.items():
            if isinstance(item, dict):
                for j in item.keys():
                    G.add_edge(key, j)
                walk(G, item)
    if isinstance(node, list):
        walk(G, node[0])
    return
            
def gen_graph(data):
    G = nx.Graph()
    
    for key in data.keys():
        G.add_edge('ROOT', key)

    walk(G, data)
    
    return G

In [None]:
G = gen_graph(data)

In [None]:
plt.figure(figsize=(10, 15))
nx.draw(
    G, with_labels=True, node_color="#dfdfdf", font_size=10,  
    pos=nx.spring_layout(G, scale=.5, k=0.5)
)

In [None]:
xnd(data)