# Read JSON Format Data

JSoN stands for JavaScript Object Notation (although it is not JavaScript!), and is a data format similar to Python dictionaries.

### Creating JSON Data

In [None]:
import json

In [None]:
#create data

data = {
    "pets": [
    {
        "name": 'Goku',
        "type": 'dog',
        "age": 4,
        "fav_food" : 'bone',
        "hobbies" : ['whining', 'licking butts']   
    },
    {
        "name": 'Mika',
        "type": 'dog',
        "age": 5,
        "fav_food" : 'bacon',
        "hobbies" : ['relaxing', 'ripping toys']
    },
    {
        "name": 'Brunson',
        "type": 'cat',
        "age": 5,
        "fav_food" : 'undecided',
        "hobbies" : 'being outside',
    },
    {
        "name": 'Tiny',
        "type": 'dog',
        "age": 4,
        "fav_food" : 'steak',
        "hobbies" : ['barking', 'sitting outside']
    }

] }

In [None]:
#write data to JSON file
with open('petdata.json', 'w') as write_file:
    json.dump(data, write_file)

In [None]:
#or you can write the data to a Python string object
#this is to be able to do something to the JSON objects in-memory
pet_string = json.dumps(data)
pet_string

In [None]:
#let's make it look more readable
pet_string = json.dumps(data, indent=4)
print(pet_string)

### Load in JSON Data

In [None]:
#bring back data from petdata.json file
with open("petdata.json", 'r') as read_file:
    data = json.load(read_file)
    
data

In [None]:
#or load from in-memory string
data = json.loads(pet_string)
data

## NBA JSON Data

Data collected from Sports Reference LLC that has NBA games played from 1985-2013.

Data source: https://data.mendeley.com/datasets/ct8f9skv97/1

In [None]:
import pandas as pd
import json

In [None]:
#use bash head command to see a little bit of the JSON file
#look at data structure

%%bash

head datasets/nbagames.json

In [None]:
filepath = "datasets/nbagames.json"

In [1]:
#data is individual JSON objects not separated by commas
#will load data as JSON and put in bigger list to be able to extract values from keys

data = []

with open(nbagames.json) as file:
    for line in file:
        data.append(json.loads(line))

NameError: name 'filepath' is not defined

In [None]:
#verify that this is a list type
type(data)

In [None]:
#check number of items in data list
#this is the total number of games in dataset
len(data)

#### Verify values within the data structure

In [None]:
#each index level is a basketball game
data[0]

In [None]:
#teams within each basketball game
data[0]['teams']

In [None]:
#get the date of a game
data[0]['date']['$date']

In [None]:
#get a team's city abbreviation
data[0]['teams'][0]['abbreviation']

In [None]:
#get a player's name
data[0]['teams'][0]['players'][0]['player']

In [None]:
#initialize variables for empty lists to hold data

datels = [] #date of game
abrvls = [] #city abbreviation
cityls = [] #name of city
homels = []   #T/F if home game
namels = [] #team name
playersls = [] #list of players that played in the game
scorels = []   #final score for game
wonls = []  #0/1 if won

In [None]:
#fill lists with data

for game in data:  #data[index]
    
    #add date to list twice for each team playing the game that day
    datels.append(game['date']['$date'])
    datels.append(game['date']['$date'])
    
    for team in game['teams']: #data[index]['teams'][index]
        abrvls.append(team['abbreviation']) 
        cityls.append(team['city'])
        homels.append(team['home'])
        namels.append(team['name'])
        scorels.append(team['score'])
        wonls.append(team['won'])
        
        members = [] #hold list of players to add to playerls
        
        for player in team['players']:  #data[index]['teams'][index]['players'][index]
            members.append(player['player'])
        
        playersls.append(members)

In [None]:
#verify amount of informaton in date list
len(datels)

In [None]:
#see first 20 values within date list
datels[:20]

In [None]:
#check that players is a list of lists
#first 5 items in players list
playersls[:5]

In [None]:
#check that values for each list add up to be the same

print(len(datels)) #date of game
print(len(abrvls)) #city abbreviation
print(len(cityls)) #name of city
print(len(homels))  #T/F if home game
print(len(namels)) #team name
print(len(playersls)) #list of players that played in the game
print(len(scorels))  #final score for game
print(len(wonls))  #0/1 if won

#### Make gathered information into dataframe

In [None]:
#zip lists together into one list
#will put in order I want my columns to be
NBAlist = list(zip(datels, namels, abrvls, cityls, homels, scorels, wonls, playersls))

#make list of column names
names = ['date', 'team_name', 'abbrv', 'city', 'home_game', 'score', 'won_game', 'players']

In [None]:
#make the dataframe
df = pd.DataFrame(NBAlist, columns=names)

df.head()