# Nutrition, Physical Activity, and Obesity - Behavioral Risk Factor Surveillance System

The data used here is from Data.gov, managed and hosted by the U.S. General Services Administration, Technology Transformation Service (https://catalog.data.gov)

The dataset includes data on adult's diet, physical activity, and weight status from Behavioral Risk Factor Surveillance System. This data is used for DNPAO's Data, ...

## Setup

First we need to import the some libraries

In [1]:
import json
import sys
from importnb import Notebook
from IPython.display import display

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import networkx as nx

In [3]:
from xnd import xnd

In [4]:
sys.path.insert(0, 'utils')
_ = !pwd
__file__ =_[0]

In [5]:
with Notebook(): 
    from json_processing import (
        get_xnd_type, json_cleaning, json_meta_type, json_normalize
    )

In [6]:
def walk(G, node):
    if isinstance(node, dict):
        for key, item in node.items():
            if isinstance(item, dict):
                for j in item.keys():
                    G.add_edge(key, j)
                walk(G, item)
    if isinstance(node, list):
        walk(G, node[0])
    return
            
def gen_graph(data):
    G = nx.Graph()
    
    for key in data.keys():
        G.add_edge('ROOT', key)

    walk(G, data)
    
    return G

In [7]:
test_data = {
    'tree': {
        'leaf1': [1, 2],
        'leaf2': [2, 4]}, 
    'cars': [
        {'model': 'model1', 'year': 2010},
        {'model': 'model2', 'color': 'blue'},
        {'ports': {'numbers': 4}}
    ]
}

In [8]:
test_data_clean = json_cleaning(test_data)
test_data_clean

{'tree': {'leaf1': [1, 2], 'leaf2': [2, 4]},
 'cars': [{'model': 'model1', 'year': 2010},
  {'model': 'model2', 'color': 'blue'},
  {'ports': {'numbers': 4}}]}

In [9]:
test_data_norm = json_normalize(test_data_clean)
test_data_norm

{'tree': {'leaf1': [1, 2], 'leaf2': [2, 4]},
 'cars': [{'model': 'model1',
   'year': 2010,
   'color': None,
   'ports': {'numbers': None}},
  {'model': 'model2',
   'color': 'blue',
   'year': None,
   'ports': {'numbers': None}},
  {'ports': {'numbers': 4}, 'model': None, 'year': None, 'color': None}]}

In [10]:
meta_type = json_meta_type(test_data_norm, True)
meta_type

{'tree': {'value_type': dict,
  'nullable': True,
  'value': {'leaf1': {'value_type': list,
    'nullable': True,
    'count': 2,
    'value': int},
   'leaf2': {'value_type': list, 'nullable': True, 'count': 2, 'value': int}}},
 'cars': {'value_type': dict,
  'nullable': True,
  'count': 3,
  'value': {'model': {'nullable': True, 'value_type': str},
   'year': {'nullable': True, 'value_type': int},
   'color': {'nullable': True, 'value_type': str},
   'ports': {'nullable': True,
    'value_type': dict,
    'value': {'numbers': {'value_type': int, 'nullable': True}}}}}}

In [11]:
test_type = get_xnd_type(meta_type)
test_type

'{tree: ?{leaf1: 2 * ?int64, leaf2: 2 * ?int64}, cars: 3 * ?{model: ?string, year: ?int64, color: ?string, ports: ?{numbers: ?int64}}}'

In [12]:
xnd(test_data_norm, type=test_type)

xnd({'tree': {'leaf1': [1, 2], 'leaf2': [2, 4]},
     'cars': [{'model': 'model1', 'year': 2010, 'color': None, 'ports': {'numbers': None}},
      {'model': 'model2', 'year': None, 'color': 'blue', 'ports': {'numbers': None}},
      {'model': None, 'year': None, 'color': None, 'ports': {'numbers': 4}}]},
    type='{tree : ?{leaf1 : 2 * ?int64, leaf2 : 2 * ?int64}, cars : 3 * ?{model : ?string, year : ?int64, color : ?string, ports : ?{numbers : ?int64}}}')

## Loading Data

The data for this tutorial could be downloaded at https://chronicdata.cdc.gov/views/hn4x-zwk7/rows.json?accessType=DOWNLOAD

We already have this data at `data/data-gov-nutrition.json`

In [13]:
data = []
with open('data/githubarchive.json') as f:
    for i, line in enumerate(f.readlines()):
        data.append(json.loads(line))

In [14]:
try:
    xdata = xnd(data)
except Exception as e:
    display(e)

ValueError('dtype mismatch: have {id: string, type: string, actor: {id: int64, login: string, gravatar_id: string, url: string, avatar_url: string}, repo: {id: int64, name: string, url: string}, payload: {ref: string, ref_type: string, master_branch: string, description: string, pusher_type: string}, public: int64, created_at: string} and {id: string, type: string, actor: {id: int64, login: string, gravatar_id: string, url: string, avatar_url: string}, repo: {id: int64, name: string, url: string}, payload: {push_id: int64, size: int64, distinct_size: int64, ref: string, head: string, before: string, commits: 1 * {sha: string, author: {email: string, name: string}, message: string, distinct: int64, url: string}}, public: int64, created_at: string}')

In [15]:
data_clean = json_cleaning(data)
data_norm = json_normalize(data_clean)

In [16]:
meta = json_meta_type(data_norm, True)
meta

{'nullable': True,
 'count': 11351,
 'value_type': dict,
 'value': {'id': {'nullable': True, 'value_type': str},
  'type': {'nullable': True, 'value_type': str},
  'actor': {'nullable': True,
   'value_type': dict,
   'value': {'id': {'value_type': int, 'nullable': True},
    'login': {'value_type': str, 'nullable': True},
    'gravatar_id': {'value_type': str, 'nullable': True},
    'url': {'value_type': str, 'nullable': True},
    'avatar_url': {'value_type': str, 'nullable': True}}},
  'repo': {'nullable': True,
   'value_type': dict,
   'value': {'id': {'value_type': int, 'nullable': True},
    'name': {'value_type': str, 'nullable': True},
    'url': {'value_type': str, 'nullable': True}}},
  'payload': {'nullable': True,
   'value_type': dict,
   'value': {'action': {'value_type': str, 'nullable': True},
    'issue': {'value_type': dict,
     'nullable': True,
     'value': {'url': {'value_type': str, 'nullable': True},
      'labels_url': {'value_type': str, 'nullable': True},
 

In [17]:
xnd_type = get_xnd_type(meta)
print(xnd_type.replace(',', ',\n'))

KeyError: 'value'

In [None]:
data_norm = json_normalize(data)

In [None]:
G = gen_graph(data_norm)

In [None]:
plt.figure(figsize=(10, 15))
nx.draw(
    G, with_labels=True, node_color="#dfdfdf", font_size=10,  
    pos=nx.spring_layout(G, scale=.5, k=0.5)
)

In [None]:
xnd(data_norm, type=xnd_type)

In [None]:
%load_ext version_information
%version_information xnd, importnb, matplotlib