In [1]:
import pandas as pd
import glob
import json

## Read Data

In [2]:
# Get JSON files list from the metadata folder
path = "/Users/xavierevans/nasa/metadata"
json_files = glob.glob(path + "/*.json")

In [3]:
print("This is what some of the file paths look like.")
json_files[:5]

This is what some of the file paths look like.


['/Users/xavierevans/nasa/metadata/MA_HIRS2_TIROSN_OMA_001.json',
 '/Users/xavierevans/nasa/metadata/SNPPCrISL1B_2.json',
 '/Users/xavierevans/nasa/metadata/GPM_2AGPROFF13SSMI_CLIM_05.json',
 '/Users/xavierevans/nasa/metadata/S5P_L1B_RA_BD7_HiR_1.json',
 '/Users/xavierevans/nasa/metadata/OMI_MINDS_NO2G_1.1.json']

### Get Unique Keys

In [4]:
json_keys = []

In [5]:
for file in json_files:
    with open(file) as f:
        data = json.load(f)
        json_keys += data.keys()

In [6]:
json_keys = list(set(json_keys))

### Get Values

In [7]:
json_values = []

In [8]:
for file in json_files:
    with open(file) as f:
        data = json.load(f)
        json_values.append([data[key] if key in data.keys() else None for key in json_keys])

### Create DataFrame

In [9]:
df = pd.DataFrame(json_values, columns=json_keys)

In [10]:
print(f"The shape of the DataFrame is {df.shape}. There are {df.shape[0]} datasets and {df.shape[1]} attributes for each.")

The shape of the DataFrame is (1738, 34). There are 1738 datasets and 34 attributes for each.


In [11]:
df

Unnamed: 0,MetadataDates,Projects,DirectDistributionInformation,AccessConstraints,RelatedUrls,Abstract,Version,MetadataAssociations,UseConstraints,Purpose,...,DataDates,ProcessingLevel,ShortName,DataLanguage,ContactGroups,Platforms,DOI,DirectoryNames,EntryTitle,Quality
0,"[{'Date': '2007-06-14T00:00:00.000Z', 'Type': ...",[{'ShortName': 'MERRA TIME-MEAN OBSERVATION DA...,,,[{'Description': 'MA_HIRS2_TIROSN_OMA variable...,The differences between the observations and t...,001,,{'Description': 'The Earth Observing System Da...,,...,"[{'Date': '2007-06-14T00:00:00.000Z', 'Type': ...",{'Id': '4'},MA_HIRS2_TIROSN_OMA,English,"[{'Roles': ['Technical Contact'], 'GroupName':...","[{'Type': 'Models/Analyses', 'ShortName': 'MER...","{'MissingReason': 'Not Applicable', 'Explanati...",,Gridded Monthly Time-Mean Observation minus An...,
1,"[{'Date': '2016-12-09T00:00:00.000Z', 'Type': ...","[{'ShortName': 'SUOMI-NPP', 'LongName': 'Suomi...",,{'Description': 'None'},[{'URL': 'https://docserver.gesdisc.eosdis.nas...,The Cross-track Infrared Sounder (CrIS) Level ...,2,,{'Description': 'The Earth Observing System Da...,The radiance data are used for sounding observ...,...,"[{'Date': '1970-01-01T00:00:00.000Z', 'Type': ...",{'Id': '1B'},SNPPCrISL1B,English,,"[{'Type': 'Earth Observation Satellites', 'Sho...",{'DOI': '10.5067/9NPOTPIPLMAW'},,Suomi NPP CrIS Level 1B Full Spectral Resoluti...,
2,"[{'Date': '2018-02-05T00:00:00.000Z', 'Type': ...","[{'ShortName': 'GPM', 'LongName': 'Global Prec...",,{'Description': 'None'},[{'Description': 'GPM SSM/I on F13 (GPROF) Cli...,Version 5 is the current version of the data s...,05,,{'Description': 'The Earth Observing System Da...,,...,"[{'Date': '2018-02-06T00:00:00.000Z', 'Type': ...",{'Id': '2'},GPM_2AGPROFF13SSMI_CLIM,English,,"[{'Type': 'Earth Observation Satellites', 'Sho...",{'DOI': '10.5067/GPM/SSMI/F13/GPROFCLIM/2A/05'},,GPM SSM/I on F13 (GPROF) Climate-based Radiome...,The primary limitation of the 3GPROF product i...
3,"[{'Date': '2017-09-08T00:00:00.000Z', 'Type': ...","[{'ShortName': 'Sentinel-5P', 'LongName': 'Cop...",,{'Description': 'EULA'},[{'URL': 'https://docserver.gesdisc.eosdis.nas...,"Starting from August 6th in 2019, Sentinel-5P ...",1,,{'Description': 'The Earth Observing System Da...,,...,"[{'Date': '2014-12-09T00:00:00.000Z', 'Type': ...",{'Id': '1B'},S5P_L1B_RA_BD7_HiR,English,,"[{'Type': 'Earth Observation Satellites', 'Sho...",{'DOI': '10.5067/SENTINEL5P/S5P_L1B_RA_BD7_HiR...,,Sentinel-5P TROPOMI Radiance product band 7 (S...,Scientific research
4,"[{'Date': '2021-11-09T00:00:00.000Z', 'Type': ...","[{'ShortName': 'MEaSUREs', 'LongName': 'Making...",,{'Description': 'None'},[{'URL': 'https://docserver.gesdisc.eosdis.nas...,As part of the NASA's Making Earth System Data...,1.1,"[{'EntryId': 'OMI_MINDS_NO2G', 'Version': '1.1...",{'Description': 'The Earth Observing System Da...,,...,"[{'Date': '2021-11-09T00:00:00.000Z', 'Type': ...",{'Id': '2G'},OMI_MINDS_NO2G,English,,"[{'Type': 'Earth Observation Satellites', 'Sho...",{'DOI': '10.5067/MEASURES/MINDS/DATA214'},,"OMI/Aura NO2 Tropospheric, Stratospheric & Tot...",Validation of the OMI NO2 retrievals with inde...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1733,"[{'Date': '2017-05-12T00:00:00.000Z', 'Type': ...","[{'ShortName': 'GPM', 'LongName': 'Global Prec...",,{'Description': 'None'},[{'Description': 'Surface Precipitation from G...,Version 5 is the current version of the data s...,05,,{'Description': 'The Earth Observing System Da...,,...,"[{'Date': '2017-05-01T00:00:00.000Z', 'Type': ...",{'Id': '2'},GPM_2AGPROFMETOPAMHS_CLIM,English,,"[{'Type': 'Earth Observation Satellites', 'Sho...",{'DOI': '10.5067/GPM/MHS/METOPA/GPROFCLIM/2A/05'},,GPM MHS on METOP-A (GPROF) Radiometer Precipit...,The major sources of systematic errors in thes...
1734,"[{'Date': '2007-06-14T00:00:00.000Z', 'Type': ...","[{'ShortName': 'MERRA', 'LongName': 'Modern Er...",,,"[{'Description': 'MATMCPRAD variable', 'URL': ...",The MATMCPRAD or tavgM_3d_rad_Cp data product ...,5.2.0,,{'Description': 'The Earth Observing System Da...,,...,"[{'Date': '2007-06-14T00:00:00.000Z', 'Type': ...",{'Id': '4'},MATMCPRAD,English,"[{'Roles': ['Technical Contact'], 'GroupName':...","[{'Type': 'Models/Analyses', 'ShortName': 'MER...",{'DOI': '10.5067/7SCF81BU67P5'},,"tavgM_3d_rad_Cp: MERRA 3D IAU Diagnostic, Radi...",
1735,"[{'Date': '2007-01-10T00:00:00.000Z', 'Type': ...","[{'ShortName': 'UARS', 'LongName': 'Upper Atmo...",,,[{'URL': 'https://docserver.gesdisc.eosdis.nas...,The UARS Particle Environment Monitor (PEM) le...,001,,{'Description': 'The Earth Observing System Da...,,...,"[{'Date': '2006-04-20T00:00:00.000Z', 'Type': ...",{'Id': '2'},UARPE2AXIS1,English,,"[{'Type': 'Earth Observation Satellites', 'Sho...","{'MissingReason': 'Not Applicable', 'Explanati...",,UARS PEM Level 2 AXIS 1 V001 (UARPE2AXIS1) at ...,
1736,"[{'Date': '2011-07-15T00:00:00.000Z', 'Type': ...","[{'ShortName': 'TRMM', 'LongName': 'Tropical R...",,{'Description': 'None'},[{'URL': 'https://disc2.gesdisc.eosdis.nasa.go...,This dataset is part of the University of Wash...,7,,{'Description': 'The Earth Observing System Da...,,...,"[{'Date': '2011-07-01T00:00:00.000Z', 'Type': ...",{'Id': '2'},TRMM_2A53UW,English,,"[{'Type': 'In Situ Land-based Platforms', 'Sho...","{'MissingReason': 'Not Applicable', 'Explanati...",,TRMM Ground Validation Radar Site Rain Rate Ma...,


In [12]:
entry_titles = df['EntryTitle'].tolist()
entry_titles[:5]

['Gridded Monthly Time-Mean Observation minus Analysis (oma) Values 0.5 x 0.667 degree V001 (MA_HIRS2_TIROSN_OMA) at GES DISC',
 'Suomi NPP CrIS Level 1B Full Spectral Resolution V2 (SNPPCrISL1B) at GES DISC',
 'GPM SSM/I on F13 (GPROF) Climate-based Radiometer Precipitation Profiling L2 1.5 hours 12 km V05 (GPM_2AGPROFF13SSMI_CLIM) at GES DISC',
 'Sentinel-5P TROPOMI Radiance product band 7 (SWIR detector) L1B 5.5km x 7km V1 (S5P_L1B_RA_BD7_HiR) at GES DISC',
 'OMI/Aura NO2 Tropospheric, Stratospheric & Total Columns MINDS Daily L2 Global Gridded 0.25 degree x 0.25 degree V1.1 (OMI_MINDS_NO2G) at GES DISC']