# Short Introduction to JSON

In [11]:
#https://www.w3schools.com/python/python_json.asp
import json
import pandas as pd

(Note: API return is usually in JSON format. pandas here is to help wrangle json data.)

In [12]:
'''
Using pd.read_json
https://pandas.pydata.org/docs/reference/api/pandas.read_json.html

Structure of iris.json 
- top-level structure array ([]) 
- each object is a single record with key value pairs in ({})
'''

# Pandas read_json function
pd.read_json('iris.json').head()

Unnamed: 0,sepalLength,sepalWidth,petalLength,petalWidth,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [13]:
type(pd.read_json('iris.json'))

pandas.core.frame.DataFrame

In [14]:
# Pandas read_json function
# Using this for food.json file will lead to error
# ValueError: Mixing dicts with non-Series may lead to ambiguous ordering (### > json data is semi-structured)

pd.read_json('food.json').head()

ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.

In [None]:
'''
Using
Python json.load first and then pd.DataFrame or pd.json_normalize

Structure of food.json
- dictionary with nested dictionaries and lists
'''
with open('food.json') as data:    
    data = json.load(data) # Decode JSON object to Python dict

In [None]:
type(data)

In [None]:
#one may want to view the JSON data to review where is the data of interest.
data  

In [15]:
### > "results" is the key, and the things we are interested are the values in it
data['results']

[{'status': 'Ongoing',
  'city': 'Seattle',
  'state': 'WA',
  'country': 'United States',
  'classification': 'Class I',
  'openfda': {},
  'product_type': 'Food',
  'event_id': '92830',
  'recalling_firm': 'David Oppenheimer & Company I LLC',
  'address_1': '180 Nickerson St Ste 211',
  'address_2': '',
  'postal_code': '98109-1631',
  'voluntary_mandated': 'Voluntary: Firm initiated',
  'initial_firm_notification': 'Two or more of the following: Email, Fax, Letter, Press Release, Telephone, Visit',
  'distribution_pattern': 'Distributed in FL, GA, IL, IN, KY, MI, NC, NY, OH, PA, TN, TX, VA, and WI.',
  'recall_number': 'F-1495-2023',
  'product_description': 'Zespri Organic Green Kiwi, product of New Zealand, one pallet of bulk loose organic green kiwifruit was shipped to one consignee in Chicago, IL. The master carton has the barcode 067834 and the kiwi PLU label has the code GTIN 9400 9552.',
  'product_quantity': '1 pallet',
  'reason_for_recall': 'Listeria monocytogenes',
  'rec

In [16]:
# One way
pd.DataFrame(data['results']).head(3)

Unnamed: 0,status,city,state,country,classification,openfda,product_type,event_id,recalling_firm,address_1,...,recall_number,product_description,product_quantity,reason_for_recall,recall_initiation_date,center_classification_date,report_date,code_info,more_code_info,termination_date
0,Ongoing,Seattle,WA,United States,Class I,{},Food,92830,David Oppenheimer & Company I LLC,180 Nickerson St Ste 211,...,F-1495-2023,"Zespri Organic Green Kiwi, product of New Zeal...",1 pallet,Listeria monocytogenes,20230808,20230908,20230920,none,,
1,Terminated,Dayton,OH,United States,Class II,{},Food,92888,Cassanos Inc,1700 E Stroop Rd,...,F-1510-2023,Cassano's The Pizza King Premium Pizza Dough o...,180,Undeclared Allergen - Soy Flour,20230821,20230913,20230920,"Lot: 193, 194 Expiration: 01/10/24, 01/11/24 I...",,20230915.0
2,Ongoing,Englewood Cliffs,NJ,United States,Class II,{},Food,92853,Conopco DBA Unilever,700 Sylvan Ave,...,F-1516-2023,Knorr Letras con Tomate Tomato Based Alphabet ...,"230,000 cases",Undeclared egg,20230810,20230913,20230920,"Best buy dates prior to and including July 6, ...",,


In [17]:
# Another way (### > give the path using jaon_normalize command)
pd.json_normalize(data, record_path='results').head() #info_path

Unnamed: 0,status,city,state,country,classification,product_type,event_id,recalling_firm,address_1,address_2,...,recall_number,product_description,product_quantity,reason_for_recall,recall_initiation_date,center_classification_date,report_date,code_info,more_code_info,termination_date
0,Ongoing,Seattle,WA,United States,Class I,Food,92830,David Oppenheimer & Company I LLC,180 Nickerson St Ste 211,,...,F-1495-2023,"Zespri Organic Green Kiwi, product of New Zeal...",1 pallet,Listeria monocytogenes,20230808,20230908,20230920,none,,
1,Terminated,Dayton,OH,United States,Class II,Food,92888,Cassanos Inc,1700 E Stroop Rd,,...,F-1510-2023,Cassano's The Pizza King Premium Pizza Dough o...,180,Undeclared Allergen - Soy Flour,20230821,20230913,20230920,"Lot: 193, 194 Expiration: 01/10/24, 01/11/24 I...",,20230915.0
2,Ongoing,Englewood Cliffs,NJ,United States,Class II,Food,92853,Conopco DBA Unilever,700 Sylvan Ave,,...,F-1516-2023,Knorr Letras con Tomate Tomato Based Alphabet ...,"230,000 cases",Undeclared egg,20230810,20230913,20230920,"Best buy dates prior to and including July 6, ...",,
3,Ongoing,Stanwood,WA,United States,Class II,Food,92907,"Twin City Foods, Inc.",10120 269th Pl Nw Ste A,,...,F-1504-2023,"Individually Quick Frozen (IQF) Corn, distribu...","Bulk Cases Total 69,134 cs (Corn 12/1kg bags =...",Listeria monocytogenes,20230818,20230912,20230920,BULK CASES have the following codes and USE BY...,,
4,Ongoing,Stanwood,WA,United States,Class II,Food,92907,"Twin City Foods, Inc.",10120 269th Pl Nw Ste A,,...,F-1505-2023,"Kernel Corn, BEST PRICE TOPVALU, net wt. per b...","TopValu 300g = 1,108 cs; Signature Select = 1,...",Listeria monocytogenes,20230818,20230912,20230920,BEST PRICE TOPVALU: 2024.02.23 Signature Sele...,,


**Note:**

We would only be covering these 2 modules at a later time. However, one may like to look at the specific chapters listed below.

Streamlined Data Ingestion with pandas
- Chapter 4: Importing JSON Data and Working with APIs

Intermediate Importing Data in Python
- Chapter 2: Interacting with APIs to import data from the web