# JSON parsing and conversion

To learn about:
- deserializing a JSON file into a Python built-in type (eg, dict)
- converting Python built-ins into a Pandas DataFrame
- filtering a DataFrame: group by some column and keep the rows in each category that has a max value of a some other column (see fast_cars_by_year() fcn; check the transform(max) method)
- converting a DataFrame back into a disctionary (to_dict() method)
- converting a Python builtin back into a JSON file and string

In [1]:
import json
import datetime
import pandas as pd

In [2]:
# Read a JSON file
def open_cat_json():
    f = open('cat.json')
    data = json.load(f)
    f.close()
    return data

open_cat_json()

{'name': 'Fluffy',
 'activities': ['play', 'eat cat food'],
 'catFriends': [{'name': 'bar',
   'activities': ['be grumpy', 'eat bread omblet'],
   'weight': 8,
   'furcolor': 'white'},
  {'name': 'foo', 'activities': ['sleep', 'pre-sleep naps'], 'weight': 3}]}

In [3]:
def add_height_weight(height, weight):
    f = open("cat.json")
    data = json.load(f)
    f.close()
    data["height"] = height
    data["weight"] = weight
    return data

add_height_weight(3, 5)

{'name': 'Fluffy',
 'activities': ['play', 'eat cat food'],
 'catFriends': [{'name': 'bar',
   'activities': ['be grumpy', 'eat bread omblet'],
   'weight': 8,
   'furcolor': 'white'},
  {'name': 'foo', 'activities': ['sleep', 'pre-sleep naps'], 'weight': 3}],
 'height': 3,
 'weight': 5}

In [4]:
# Note how the max() builtin function can be used to retrieve the key of the max value of a dictionary
def max_weight_friend():
    f = open("cat.json")
    data = json.load(f)
    f.close()
    # data = json.loads("cat.json")
    friends_weights = {data["catFriends"][i]['name']: data["catFriends"][i]['weight'] for i in range(len(data["catFriends"]))}
    return max(friends_weights, key=friends_weights.get)

max_weight_friend()

'bar'

In [5]:
# Build a json file
def create_new_cat(cat_name="fluffy", activities_list=[], weight=10):
    data = {"name": cat_name,
            "activities": activities_list,
            "weight": weight}
    with open("data.json", "w") as f:
        json.dump(data, f)
    return json.dumps(data)
create_new_cat()

'{"name": "fluffy", "activities": [], "weight": 10}'

In [6]:
def add_new_cat_friend(new_cat, friend_list):
    f = open(new_cat)
    data = json.load(f)
    f.close()
    data["catFriends"] = friend_list
    with open("data.json", "w") as f:
        json.dump(data, f)
    return data

friend_list = [{"name": "Sparkle", "activities": ["reading", "eating"], "weight": 9}, 
               {"name": "Manny", "activities": ["skating", "hiking", "walking"], "weight": 6}]

add_new_cat_friend("data.json", friend_list)

{'name': 'fluffy',
 'activities': [],
 'weight': 10,
 'catFriends': [{'name': 'Sparkle',
   'activities': ['reading', 'eating'],
   'weight': 9},
  {'name': 'Manny',
   'activities': ['skating', 'hiking', 'walking'],
   'weight': 6}]}

In [7]:
def open_car_json():
    f = open("cars.json")
    data = json.load(f)
    f.close()
    return data

car_data = open_car_json()
df = pd.DataFrame(car_data)
df.head(40)

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA
5,ford galaxie 500,15.0,8,429.0,198.0,4341,10.0,1970-01-01,USA
6,chevrolet impala,14.0,8,454.0,220.0,4354,9.0,1970-01-01,USA
7,plymouth fury iii,14.0,8,440.0,215.0,4312,8.5,1970-01-01,USA
8,pontiac catalina,14.0,8,455.0,225.0,4425,10.0,1970-01-01,USA
9,amc ambassador dpl,15.0,8,390.0,190.0,3850,8.5,1970-01-01,USA


In [8]:
# unique() collects the items of a Pandas Series without the repeats
# count() counts the number of non-NaN values in the specified DataFrame column or set of columns
def count_cars():
    f = open("cars.json")
    data = json.load(f)
    f.close()
    df = pd.DataFrame(data)
    return (len(pd.unique(df["Name"])), df["Name"].count())
count_cars()

(311, 406)

In [9]:
def count_by_country():
    f = open("cars.json")
    data = json.load(f)
    f.close()
    df = pd.DataFrame(data)
    return df.groupby(["Origin"])["Name"].count()

count_by_country()

Origin
Europe     73
Japan      79
USA       254
Name: Name, dtype: int64

In [10]:
def count_by_country2():
    f = open("cars.json")
    data = json.load(f)
    f.close()
    df = pd.DataFrame(data)
    return df.set_index(["Origin"]).groupby(level=0).count()

count_by_country2()

Unnamed: 0_level_0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year
Origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Europe,73,70,73,73,71,73,73,73
Japan,79,79,79,79,79,79,79,79
USA,254,249,254,254,250,254,254,254


In [11]:
def fast_cars_by_years():
    # Load data from json
    f = open("cars.json")
    data = json.load(f)
    f.close()
    
    # Convert data into DataFrame
    df = pd.DataFrame(data)
    
    # Filter DataFrame so as to keep the fastest car for each year
    series = df.groupby(["Year"])["Acceleration"].transform(max) == df["Acceleration"]
    series = series[series == True]
    index_set = series.index
    filtered_df = df.loc[index_set]
    
    # Convert DataFrame back into a dictionary that can be serialized in JSON
    filtered_array = list(filtered_df.values)
    filtered_list = [list(filtered_array[i]) for i in range(len(filtered_array))]
    
    # Convert DataFrame into a json
    with open("filtered_cars.json", "w") as f:
        json.dump(filtered_list, f)
    
    # Output the json string
    return json.dumps(filtered_list)

fast_cars_by_years()
    

'[["volkswagen 1131 deluxe sedan", 26.0, 4, 97.0, 46.0, 1835, 20.5, "1970-01-01", "Europe"], ["plymouth cricket", 26.0, 4, 91.0, 70.0, 1955, 20.5, "1971-01-01", "USA"], ["volkswagen type 3", 23.0, 4, 97.0, 54.0, 2254, 23.5, "1972-01-01", "Europe"], ["volkswagen super beetle", 26.0, 4, 97.0, 46.0, 1950, 21.0, "1973-01-01", "Europe"], ["toyota corolla 1200", 32.0, 4, 71.0, 65.0, 1836, 21.0, "1974-01-01", "Japan"], ["mercury monarch", 15.0, 6, 250.0, 72.0, 3432, 21.0, "1975-01-01", "USA"], ["buick century", 17.0, 6, 231.0, 110.0, 3907, 21.0, "1975-01-01", "USA"], ["chevrolet chevette", 29.0, 4, 85.0, 52.0, 2035, 22.2, "1976-01-01", "USA"], ["oldsmobile cutlass supreme", 17.0, 8, 260.0, 110.0, 4060, 19.0, "1977-01-01", "USA"], ["ford granada", 18.5, 6, 250.0, 98.0, 3525, 19.0, "1977-01-01", "USA"], ["volkswagen rabbit custom diesel", 43.1, 4, 90.0, 48.0, 1985, 21.5, "1978-01-01", "Europe"], ["peugeot 504", 27.2, 4, 141.0, 71.0, 3190, 24.8, "1979-01-01", "Europe"], ["vw dasher (diesel)", 43

In [12]:
def car_search(year, mpg):
    # Get the data from the JSON file
    f = open("cars.json")
    data = json.load(f)
    f.close()
    
    # Convert data into a DataFrame
    df = pd.DataFrame(data)
    
    # Seach by Year and MPG
    df = df[(df["Year"] >= year) & (df["Miles_per_Gallon"] >= mpg)]
    
    # Convert back into a serializable format (nested lists)
    filtered_dict = df.to_dict("index")
    
    # Serialize
    with open("car_search.json", "w") as f:
        json.dump(filtered_dict, f)
    return json.dumps(filtered_dict)

car_search("1982", 30)

'{"349": {"Name": "plymouth reliant", "Miles_per_Gallon": 30.0, "Cylinders": 4, "Displacement": 135.0, "Horsepower": 84.0, "Weight_in_lbs": 2385, "Acceleration": 12.9, "Year": "1982-01-01", "Origin": "USA"}, "350": {"Name": "toyota starlet", "Miles_per_Gallon": 39.1, "Cylinders": 4, "Displacement": 79.0, "Horsepower": 58.0, "Weight_in_lbs": 1755, "Acceleration": 16.9, "Year": "1982-01-01", "Origin": "Japan"}, "351": {"Name": "plymouth champ", "Miles_per_Gallon": 39.0, "Cylinders": 4, "Displacement": 86.0, "Horsepower": 64.0, "Weight_in_lbs": 1875, "Acceleration": 16.4, "Year": "1982-01-01", "Origin": "USA"}, "352": {"Name": "honda civic 1300", "Miles_per_Gallon": 35.1, "Cylinders": 4, "Displacement": 81.0, "Horsepower": 60.0, "Weight_in_lbs": 1760, "Acceleration": 16.1, "Year": "1982-01-01", "Origin": "Japan"}, "353": {"Name": "subaru", "Miles_per_Gallon": 32.3, "Cylinders": 4, "Displacement": 97.0, "Horsepower": 67.0, "Weight_in_lbs": 2065, "Acceleration": 17.8, "Year": "1982-01-01", 