# Flattening JSON objects in Python

A blog post related to this notebook is available <a href="https://medium.com/amirziai/flattening-json-objects-in-python-f5343c794b10">here</a>
<br>
Written by Amir Ziai <a href="http://twitter.com/amirziai">@amirziai</a>

### Flattening with json_normalize

In [1]:
sample_object = {
                    'Name':'John',
                    'Location':{'City':'Los Angeles','State':'CA'},
                }

In [2]:
from pandas.io.json import json_normalize
json_normalize(sample_object)

Unnamed: 0,Location.City,Location.State,Name
0,Los Angeles,CA,John


### Flattening an object with embedded arrays

In [10]:
from pandas.io.json import json_normalize
sample_object2 = {
    'driverPhoto': ['0','1'], 
    'driverVehicle': [{'Age of vehicle': '', 'Registration number': '', 'Load condition type': 'Normally Loaded', 'Load condition': 'Passenger', '_localId': '7abb9ca1-42f1-4faf-bbab-92f6ea656eb9', 'Type of vehicle': 'Auto rickshaw'}, {'Age of vehicle': '', 'Registration number': 'MH43BZ2759', 'Load condition type': 'Normally Loaded', 'Load condition': 'Passenger', '_localId': 'e6410b46-1de5-417d-9f45-b36d17a13251', 'Type of vehicle': 'Motorised two wheeler'}], 'driverPeople': [{'Helmet': [], 'Type of License': 'Unknown', 'Seat Belt': [], 'LAST NAME': '', 'GENDER': 'Male', 'AGE': '', 'Vehicle': '7abb9ca1-42f1-4faf-bbab-92f6ea656eb9', 'Involvment': ['Accused'], 'MIDDLE NAME': '', 'FIRST NAME': '', 'Role': 'Driver', 'ADDRESS': '', 'INJURY': 'Not injured', 'DRIVING LICENSE NUMBER': '', '_localId': 'f4c78ea9-9b01-4a05-a146-95c565f5bf4a'}, {'Helmet': ['Unknown'], 'Type of License': 'Unknown', 'Seat Belt': [], 'LAST NAME': 'Vakode', 'GENDER': 'Male', 'AGE': '54', 'Vehicle': 'e6410b46-1de5-417d-9f45-b36d17a13251', 'Involvment': ['Victim'], 'MIDDLE NAME': 'Maruti', 'FIRST NAME': 'Ashok', 'Role': 'Driver', 'ADDRESS': '', 'INJURY': 'Seriously Injured', 'DRIVING LICENSE NUMBER': '', '_localId': 'cb3ed15a-0b16-4ef4-a870-f942df03c2ce'}], 'driverRoadRelatedDetail': {'Crash Spot': 'Market/Commercial area', 'Road Features': 'Straight Road', 'Road Number': '', 'Road Surface condition': 'Paved', 'Chainage': '', 'Road Name': 'Goregaon Mulund Link Road', 'Number of lanes': '2', '_localId': 'a152857e-382f-4547-8745-c84bf5118445', 'Landmark': 'in front of Cement Company, Airoli Bound, Mulund east, Mumbai', 'Crash Spot - Others (specify)': '', 'Type of road': 'District Road'}, 'driverIncidentDetails': {'Hit & Run': 'Yes', 'Police Station': 'Navghar', 'District': 'Mumbai', 'Type of Collision - Others (specify)': '', 'Crash Severity': 'Serious Injury', 'Type of Collision': 'Vehicle to Vehicle', 'State': 'Maharashtra', 'Charges against - Other  (please specify)': '', 'Crime Registration Number': '195/2019', 'Record Type': 'Production', '_localId': 'ef014f5f-6373-46c6-9d4d-0811b50d2b0d', 'Charges against': ['134a', '134b', '279', '338'], 'Type of Area': 'Urban'}
}

In [11]:
json_normalize(sample_object2)

Unnamed: 0,driverIncidentDetails.Charges against,driverIncidentDetails.Charges against - Other (please specify),driverIncidentDetails.Crash Severity,driverIncidentDetails.Crime Registration Number,driverIncidentDetails.District,driverIncidentDetails.Hit & Run,driverIncidentDetails.Police Station,driverIncidentDetails.Record Type,driverIncidentDetails.State,driverIncidentDetails.Type of Area,...,driverRoadRelatedDetail.Crash Spot - Others (specify),driverRoadRelatedDetail.Landmark,driverRoadRelatedDetail.Number of lanes,driverRoadRelatedDetail.Road Features,driverRoadRelatedDetail.Road Name,driverRoadRelatedDetail.Road Number,driverRoadRelatedDetail.Road Surface condition,driverRoadRelatedDetail.Type of road,driverRoadRelatedDetail._localId,driverVehicle
0,"[134a, 134b, 279, 338]",,Serious Injury,195/2019,Mumbai,Yes,Navghar,Production,Maharashtra,Urban,...,,"in front of Cement Company, Airoli Bound, Mulu...",2,Straight Road,Goregaon Mulund Link Road,,Paved,District Road,a152857e-382f-4547-8745-c84bf5118445,"[{'Age of vehicle': '', 'Registration number':..."


### Using a recusrive function to flatten an object of arbitrary structure

In [12]:
def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[str(name[:-1])] = str(x)

    flatten(y)
    return out

In [13]:
flat = flatten_json(sample_object2)
flat

{'driverPhoto_0': '0',
 'driverPhoto_1': '1',
 'driverVehicle_0_Age of vehicle': '',
 'driverVehicle_0_Registration number': '',
 'driverVehicle_0_Load condition type': 'Normally Loaded',
 'driverVehicle_0_Load condition': 'Passenger',
 'driverVehicle_0__localId': '7abb9ca1-42f1-4faf-bbab-92f6ea656eb9',
 'driverVehicle_0_Type of vehicle': 'Auto rickshaw',
 'driverVehicle_1_Age of vehicle': '',
 'driverVehicle_1_Registration number': 'MH43BZ2759',
 'driverVehicle_1_Load condition type': 'Normally Loaded',
 'driverVehicle_1_Load condition': 'Passenger',
 'driverVehicle_1__localId': 'e6410b46-1de5-417d-9f45-b36d17a13251',
 'driverVehicle_1_Type of vehicle': 'Motorised two wheeler',
 'driverPeople_0_Type of License': 'Unknown',
 'driverPeople_0_LAST NAME': '',
 'driverPeople_0_GENDER': 'Male',
 'driverPeople_0_AGE': '',
 'driverPeople_0_Vehicle': '7abb9ca1-42f1-4faf-bbab-92f6ea656eb9',
 'driverPeople_0_Involvment_0': 'Accused',
 'driverPeople_0_MIDDLE NAME': '',
 'driverPeople_0_FIRST NAME

In [8]:
json_normalize(flat)

Unnamed: 0,Hobbies_0,Hobbies_1,Hobbies_2,Location_City,Location_State,Name
0,Running,Music,Programming,Los Angeles,CA,John
