## CHALLENGE 1

Reading your hierarchical data files

*Note:
You can follow the Data-Downloader Guide to download any data files you need for this challenge.*

In [1]:
#Libraries used

import numpy as np
import h5py

### Reading your hdf file from NASA Earthdata

In [2]:
# We use the File() function to open and create files

viirs = h5py.File('VNP46A2.A2022252.h00v06.001.2022260143526.h5', "r")

### Valid modes of this function are:

'r' - For reading only, the file must exist (it is also the default).

'r+' - For reading and writing, the file must exist.

'w' - Used to create a file or truncate if it exists.

'w'- or x - For creating a file, the command fails if the file already exists.

'a' - For reading and writing if it already exists, creates a file otherwise.

#### Now try opening your file below 

In [3]:
#f=h5py.File('filename','mode')

#

### Getting items in the base directory

In [4]:
base_items=list(viirs.items())
print('Items in the base directory:',base_items)

#reading groups

Items in the base directory: [('HDFEOS', <HDF5 group "/HDFEOS" (2 members)>), ('HDFEOS INFORMATION', <HDF5 group "/HDFEOS INFORMATION" (1 members)>)]


### Getting items from the subgroups

In [5]:
#reading subgroups 
#here we are getting items from the 'HDFEOS' group
g1=viirs.get('HDFEOS')
g1_items=list(g1.items())
print('Items in HDFEOS',g1_items)

#Similarly we can get items of the 'HDFEOS INFORMATION' group

Items in HDFEOS [('ADDITIONAL', <HDF5 group "/HDFEOS/ADDITIONAL" (1 members)>), ('GRIDS', <HDF5 group "/HDFEOS/GRIDS" (1 members)>)]


In [6]:
#Opening a subgroup

g3=g1.get('ADDITIONAL') #g1 has got the group HDFEOS and from g1 we will derive the subgroups of HDFEOS
g3_items=list(g3.items())
print('Items in subgroup ADDITIONAL',g3_items)

Items in subgroup ADDITIONAL [('FILE_ATTRIBUTES', <HDF5 group "/HDFEOS/ADDITIONAL/FILE_ATTRIBUTES" (0 members)>)]


In [7]:
g4=g1.get('GRIDS') #g1 has got the group HDFEOS and from g1 we will derive the subgroups of HDFEOS
g4_items=list(g4.items())
print('Items in subgroup GRIDS',g4_items)

Items in subgroup GRIDS [('VNP_Grid_DNB', <HDF5 group "/HDFEOS/GRIDS/VNP_Grid_DNB" (1 members)>)]


In [8]:
#We can also open the subgroups with location path
g5=viirs.get('/HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields')
g5_items=list(g5.items())
print('Items in VNP_Grid_DNB',g5_items)

Items in VNP_Grid_DNB [('DNB_BRDF-Corrected_NTL', <HDF5 dataset "DNB_BRDF-Corrected_NTL": shape (2400, 2400), type "<u2">), ('DNB_Lunar_Irradiance', <HDF5 dataset "DNB_Lunar_Irradiance": shape (2400, 2400), type "<u2">), ('Gap_Filled_DNB_BRDF-Corrected_NTL', <HDF5 dataset "Gap_Filled_DNB_BRDF-Corrected_NTL": shape (2400, 2400), type "<u2">), ('Latest_High_Quality_Retrieval', <HDF5 dataset "Latest_High_Quality_Retrieval": shape (2400, 2400), type "|u1">), ('Mandatory_Quality_Flag', <HDF5 dataset "Mandatory_Quality_Flag": shape (2400, 2400), type "|u1">), ('QF_Cloud_Mask', <HDF5 dataset "QF_Cloud_Mask": shape (2400, 2400), type "<u2">), ('Snow_Flag', <HDF5 dataset "Snow_Flag": shape (2400, 2400), type "|u1">)]


### Opening a dataset

In [9]:
g5=viirs.get('/HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields')

In [10]:
#Getting a list of all the datasets that are available in the path /HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields

ls=list(g5.keys())
print('List of datasets in Data Fields: \n',ls)


List of datasets in Data Fields: 
 ['DNB_BRDF-Corrected_NTL', 'DNB_Lunar_Irradiance', 'Gap_Filled_DNB_BRDF-Corrected_NTL', 'Latest_High_Quality_Retrieval', 'Mandatory_Quality_Flag', 'QF_Cloud_Mask', 'Snow_Flag']


In [11]:
#Here we are opening the dataset and converting it into a numpy array
#Once it is in numpy array format you can perform array functions on it

data=viirs.get('StructMetadata.0')
dataset1=np.array(data)

In [12]:
data1=np.array(g5.get('DNB_BRDF-Corrected_NTL'))
print('Shape of dataset1: \n', dataset1.shape)
print(data1)

Shape of dataset1: 
 ()
[[65535 65535 65535 ... 65535 65535 65535]
 [65535 65535 65535 ... 65535 65535 65535]
 [65535 65535 65535 ... 65535 65535 65535]
 ...
 [65535 65535 65535 ... 65535 65535 65535]
 [65535 65535 65535 ... 65535 65535 65535]
 [65535 65535 65535 ... 65535 65535 65535]]


### Read attributes of the dataset

In [13]:
data1=g5.get('DNB_BRDF-Corrected_NTL')
k=list(data1.attrs.keys()) #list of keys
v=list(data1.attrs.values()) #list of values
print(k[0]) 
print(v[0])


#first value should correspond to the first key

print(data1.attrs[k[0]]) #here we are printing the value with corresponds to the first key

print(k[4],v[4])

_FillValue
[65535]
[65535]
units b'nWatts/(cm^2 sr) \n'


# Try it yourself!

#### 1. Open your h5 file 

#### 2. Read the base directory items into a list and print 

#### 3. Open groups 

#### 4. Open subgroups 

#### 5. Open subgroups using the path

#### 6. Open a dataset and convert into numpy array

#### 7. Get list of datasets in a location

#### 8. Create a list of keys and values for the datasets

### *Explore similar functions for your other data files!*