In [1]:
"""
Reference: https://www.drivendata.co/blog/predict-pm25-benchmark/

pyhdf appears to be more powerful than gdal, so it may be worth adopting some of the 
methods used here for working with hdf files.

Additionally, the tutorial shows how to make a masked numpy array, which allows us to work
with sparse arrays? (I'm not sure how this works yet.)

Finally, the tutorial explains how to align AOD data with coordinates. This could let us
make some useful model features, like local weather conditions, etc.

"""

import os
import pandas as pd
from datetime import datetime
from osgeo import gdal
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
import keras.backend as backend
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from dateutil import parser
import matplotlib.pyplot as plt
from pyhdf.SD import SD, SDC, SDS
import pyproj
from pyproj import CRS, Proj
from typing import Union
from shapely.geometry import Point, Polygon
import geopandas as gpd
from datetime import datetime, timedelta
import pickle
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import KFold
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score



# from pathlib import Path
# import random
# from typing import Dict, List, Union

# from cloudpathlib import S3Path
import geopandas as gpd
from netCDF4 import Dataset
# import rasterio

# DATA_PATH = Path.cwd().parent / "data"
# RAW = DATA_PATH / "raw"
# INTERIM = DATA_PATH / "interim"

In [2]:
"""
DATA PROCESSING

"""

'\nDATA PROCESSING\n\n'

In [3]:
# Loop over orbits to apply the attributes
def calibrate_data(dataset: SDS, shape: list[int], calibration_dict: dict):
    """Given a MAIAC dataset and calibration parameters, return a masked
    array of calibrated data.
    
    Args:
        dataset (SDS): dataset in SDS format (e.g. blue band AOD).
        shape (List[int]): dataset shape as a list of [orbits, height, width].
        calibration_dict (Dict): dictionary containing, at a minimum,
            `valid_range` (list or tuple), `_FillValue` (int or float),
            `add_offset` (float), and `scale_factor` (float).
    
    Returns:
        corrected_AOD (np.ma.MaskedArray): masked array of calibrated data
            with a fill value of nan.
    """
    corrected_AOD = np.ma.empty(shape, dtype=np.double)
    for orbit in range(shape[0]):
        data = dataset[orbit, :, :].astype(np.double)
        invalid_condition = (
            (data < calibration_dict["valid_range"][0]) |
            (data > calibration_dict["valid_range"][1]) |
            (data == calibration_dict["_FillValue"])
        )
        data[invalid_condition] = np.nan
        data = (
            (data - calibration_dict["add_offset"]) *
            calibration_dict["scale_factor"]
        )
        data = np.ma.masked_array(data, np.isnan(data))
        corrected_AOD[orbit, : :] = data
    corrected_AOD.fill_value = np.nan
    return corrected_AOD



In [4]:
"""

Aligning AOD data with real world coordinates


"""


def create_meshgrid(alignment_dict: dict, shape: list[int]):
    """Given an image shape, create a meshgrid of points
    between bounding coordinates.
    
    Args:
        alignment_dict (Dict): dictionary containing, at a minimum,
            `upper_left` (tuple), `lower_right` (tuple), `crs` (str),
            and `crs_params` (tuple).
        shape (List[int]): dataset shape as a list of
            [orbits, height, width].
    
    Returns:
        xv (np.array): x (longitude) coordinates.
        yv (np.array): y (latitude) coordinates.
    """
    # Determine grid bounds using two coordinates
    x0, y0 = alignment_dict["upper_left"]
    x1, y1 = alignment_dict["lower_right"]
    
    # Interpolate points between corners, inclusive of bounds
    x = np.linspace(x0, x1, shape[2], endpoint=True)
    y = np.linspace(y0, y1, shape[1], endpoint=True)
    
    # Return two 2D arrays representing X & Y coordinates of all points
    xv, yv = np.meshgrid(x, y)
    return xv, yv

In [5]:


# Source: https://spatialreference.org/ref/sr-org/modis-sinusoidal/proj4js/

def transform_arrays(
    xv: Union[np.array, float],
    yv: Union[np.array, float],
    crs_from: CRS,
    crs_to: CRS
):
    """Transform points or arrays from one CRS to another CRS.
    
    Args:
        xv (np.array or float): x (longitude) coordinates or value.
        yv (np.array or float): y (latitude) coordinates or value.
        crs_from (CRS): source coordinate reference system.
        crs_to (CRS): destination coordinate reference system.
    
    Returns:
        lon, lat (tuple): x coordinate(s), y coordinate(s)
    """
    transformer = pyproj.Transformer.from_crs(
        crs_from,
        crs_to,
        always_xy=True,
    )
    
    lon, lat = transformer.transform(xv, yv)
    return lon, lat



# Project sinu grid onto wgs84 grid

In [6]:
#Currently not used
def convert_array_to_df(
    corrected_arr: np.ma.MaskedArray,
    lat:np.ndarray,
    lon: np.ndarray,
    granule_id: str,
    crs: CRS,
    total_bounds: np.ndarray = None
):
    """Align data values with latitude and longitude coordinates
    and return a GeoDataFrame.
    
    Args:
        corrected_arr (np.ma.MaskedArray): data values for each pixel.
        lat (np.ndarray): latitude for each pixel.
        lon (np.ndarray): longitude for each pixel.
        granule_id (str): granule name.
        crs (CRS): coordinate reference system
        total_bounds (np.ndarray, optional): If provided,
            will filter out points that fall outside of these bounds.
            Composed of xmin, ymin, xmax, ymax.
    """
    lats = lat.ravel()
    lons = lon.ravel()
    n_orbits = len(corrected_arr)
    size = lats.size
    values = {
        "value": np.concatenate([d.data.ravel() for d in corrected_arr]),
        "lat": np.tile(lats, n_orbits),
        "lon": np.tile(lons, n_orbits),
        "orbit": np.arange(n_orbits).repeat(size),
        "granule_id": [granule_id] * size * n_orbits
        
    }
    
    df = pd.DataFrame(values).dropna()
    if total_bounds is not None:
        x_min, y_min, x_max, y_max = total_bounds
        df = df[df.lon.between(x_min, x_max) & df.lat.between(y_min, y_max)]
    
    gdf = gpd.GeoDataFrame(df)
    gdf["geometry"] = gpd.points_from_xy(gdf.lon, gdf.lat)
    gdf.crs = crs
    return gdf[["granule_id", "orbit", "geometry", "value"]].reset_index(drop=True)

In [7]:
"""

Some more helpful functions from the tutorial


"""

def create_calibration_dict(data: SDS):
    """Define calibration dictionary given a SDS dataset,
    which contains:
        - name
        - scale factor
        - offset
        - unit
        - fill value
        - valid range
    
    Args:
        data (SDS): dataset in the SDS format.
    
    Returns:
        calibration_dict (Dict): dict of calibration parameters.
    """
    return data.attributes()


def create_alignment_dict(hdf: SD):
    """Define alignment dictionary given a SD data file, 
    which contains:
        - upper left coordinates
        - lower right coordinates
        - coordinate reference system (CRS)
        - CRS parameters
    
    Args:
        hdf (SD): hdf data object
    
    Returns:
        alignment_dict (Dict): dict of alignment parameters.
    """
    group_1 = hdf.attributes()["StructMetadata.0"].split("END_GROUP=GRID_1")[0]
    hdf_metadata = dict([x.split("=") for x in group_1.split() if "=" in x])
    alignment_dict = {
        "upper_left": eval(hdf_metadata["UpperLeftPointMtrs"]),
        "lower_right": eval(hdf_metadata["LowerRightMtrs"]),
        "crs": hdf_metadata["Projection"],
        "crs_params": eval(hdf_metadata["ProjParams"])
    }
    
    return alignment_dict

In [28]:
from shapely.geometry import Point, Polygon

"""

Everything here is original code that uses the functions from the tutorial.

within(): taken from https://automating-gis-processes.github.io/2017/lessons/L3/point-in-polygon.html

Make_Submatrix(): A function which takes raw AOD matrix and a Grid ID of interest as input and outputs a submatrix 
of AOD values which are inside this grid point (5km by 5km).

Currently, Make_Submatrix() only returns the number of pixels in the AOD matrix are within the location determined
by Grid ID.

The rest of the code in this cell runs extremely slowly, but this is because we are running it for all possible 
combinations of HDF file and Grid ID. When we actually use these functions to run a model on a given Grid ID and 
datetime, we will first filter the set of HDF files such that we only search through HDF files with matching city 
and matching datetime.

"""


#Helper function
def Make_Poly(polyString):
    poly_coords = []
    for string in polyString.split(','):
        split_string = string.split(' ')
        if split_string[0] == 'POLYGON':
            split_string = split_string[1:]
            split_string[0] = str(split_string[0])[2:]
    #         print(tuple(float(x) for x in split_string))
        elif split_string[0] == '':
            split_string = split_string[1:]
        if split_string[1][-2] == ')':
            split_string[1] = split_string[1][0:-2]
        poly_coords.append(tuple(float(x) for x in split_string))

    return Polygon(poly_coords)



#Main function
def Make_Submatrix(corrected_AOD, lon, lat, alignment_dict, grid_md, gridID, is_hdf):
    
    poly = Make_Poly(grid_md['wkt'][gridID])
    if is_hdf:        
        return_list = []
        for band in range(len(corrected_AOD)):
            counter = 0
            triples_array = []
            for i in range(len(corrected_AOD[0])):
                if lat[i,0] > poly.bounds[3]:
                    continue

                if lat[i,0] < poly.bounds[1]:
                    continue

                for j in range(len(corrected_AOD[0][0])):
                    if lon[i,j] > poly.bounds[2]:
                        continue
                    p1 = Point(lon[i,j], lat[i,j]) 
                    if(p1.within(poly)):
                        triples_array.append((i, j, corrected_AOD[band,i,j]))


            if len(triples_array) == 0:
                continue

            temp_array = np.zeros((10,10))
            temp_array = np.ma.masked_array(temp_array, mask=np.ones((10,10)))

            i_array = [x[0] for x in triples_array]
            j_array = [x[1] for x in triples_array]
            min_i = min(i_array)
            min_j = min(j_array)



            for triple in triples_array:
                if triple[2] is np.ma.masked:
                    pass

                else:
                    temp_array[triple[0]-min_i, triple[1]-min_j] = triple[2]
            return_list.append(temp_array)
        
    #this case runs when we are working with a .nc file    
    else:
        
    #Since the lon, lat arrays are masked, we will use the fact that AOD arrays from the .nc file have a constant
    #pattern (the valid regions form a curve going from the top right to the bottom left) to narrow down the 
    #search
        return_list = []
        triples_array = []
        for i in range(len(corrected_AOD)):
            min_unmasked_index = np.ma.flatnotmasked_edges(lat[i])[0]
            max_unmasked_index = np.ma.flatnotmasked_edges(lat[i])[1]
            if lat[i][min_unmasked_index] < poly.bounds[1]:
                continue
            if lat[i][max_unmasked_index] > poly.bounds[3]:
                continue
            for j in range(len(corrected_AOD[0])):
                
                #need to check if our longitude, latitude values are masked
                if lon[i,j] is np.ma.masked:
                    pass
                
                p1 = Point(lon[i,j], lat[i,j]) 
                if(p1.within(poly)):
                    triples_array.append((i, j, corrected_AOD[i,j]))
                    
                    
            if len(triples_array) == 0:
                continue

            temp_array = np.zeros((10,10))
            temp_array = np.ma.masked_array(temp_array, mask=np.ones((10,10)))

            i_array = [x[0] for x in triples_array]
            j_array = [x[1] for x in triples_array]
            min_i = min(i_array)
            min_j = min(j_array)



            for triple in triples_array:
                if triple[2] is np.ma.masked:
                    pass

                else:
                    temp_array[triple[0]-min_i, triple[1]-min_j] = triple[2]
            return_list.append(temp_array)
        print(temp_array)
                
            
        
        
        
        
        
    return return_list 
    


In [9]:
#Note that each AOD_array in array_of_AOD_arrays should be a precalculated subarray corresponding to grid id
#Currently not using the is_hdf parameter but will probably have to as we make this more sophisticated
def collect_features(array_of_AOD_arrays, area_per_subarray, is_hdf):
    

    total_values = len(array_of_AOD_arrays)*area_per_subarray

    all_values = np.zeros((total_values))
    all_values = np.ma.masked_array(all_values, mask=np.ones((total_values)))


    counter = 0
    for AOD_array in array_of_AOD_arrays:
        for row in AOD_array:
            for value in row:
                if not (value is np.ma.masked):
                    all_values[counter] = value
                    counter+=1


    mean = np.ma.mean(all_values)
    minimum = np.ma.min(all_values)
    maximum = np.ma.max(all_values)
    std = np.ma.std(all_values)
    summ = np.ma.sum(all_values)


    
    
    
    return(mean, minimum, maximum, std, summ)

In [38]:
import random

def get_features(train_labels, satellite_metadata, grid_id_list, training):
    features = []
    

    r = list(range(len(train_labels)))
    random.shuffle(r)
    for i in r:
        
        print(i)
#         print(features)

        satellite_metadata_cut = satellite_metadata

        grid_id = train_labels['grid_id'][i]
        j = grid_id_list.index(grid_id)
        location = grid_metadata['location'][j]
        tz = grid_metadata['tz'][j]
        datetime = pd.to_datetime(
            train_labels['datetime'][j],
            format="%Y%m%dT%H:%M:%S",
            utc=True
        )
        polygon = grid_metadata['wkt'][j]

        if location == 'Delhi':
            satellite_metadata_cut = satellite_metadata[satellite_metadata['location'] == 'dl'].copy()
        elif location == 'Los Angeles (SoCAB)':
            satellite_metadata_cut = satellite_metadata[satellite_metadata['location'] == 'la'].copy()
        elif location == 'Taipei':
            satellite_metadata_cut = satellite_metadata[satellite_metadata['location'] == 'tpe'].copy()


        valid_datetime = [None]*len(satellite_metadata_cut)
        satellite_metadata_cut.reset_index(drop=True, inplace=True) # ensure indexes pair with number of rows
    #     count = 0
        for index, row in satellite_metadata_cut.iterrows():
            


            datetime1 = pd.to_datetime((row['time_start']), format="%Y%m%dT%H:%M:%S", 
                                    utc=True) 
            datetime2 = pd.to_datetime(row['time_end'], format="%Y%m%dT%H:%M:%S", 
                                                utc=True) 


            truth1 = (datetime <= datetime2)
            truth2 = datetime2 <= datetime + timedelta(hours=24)
 
            valid_datetime[index] = (truth1 & truth2)



        satellite_metadata_cut['valid_datetime'] = valid_datetime
        satellite_metadata_cut = satellite_metadata_cut[satellite_metadata_cut['valid_datetime'] == True]

        raw_hdf_nc_set = list(satellite_metadata_cut['granule_id']) 


        print(raw_hdf_nc_set, location)
#         print('next:')

        list_of_all_AOD_arrays = []
        for hdf_nc_filename in raw_hdf_nc_set:
            
#             print('now working with:', hdf_nc_filename)
            
            if training:
                filepath = 'train/' + hdf_nc_filename
            else:
                filepath = 'test/' + hdf_nc_filename

#             print('here:', hdf_nc_filename)
            if hdf_nc_filename.endswith('f'):
                
#                 print('this should happen')
            
                raw_hdf = SD(filepath)

                alignment_dict = create_alignment_dict(raw_hdf)

                blue_band_AOD = raw_hdf.select("Optical_Depth_047")
                name, num_dim, shape, types, num_attr = blue_band_AOD.info()
                calibration_dict = create_calibration_dict(blue_band_AOD)
                corrected_AOD = calibrate_data(blue_band_AOD, shape, calibration_dict)

                xv, yv = create_meshgrid(alignment_dict, shape)            
                sinu_crs = Proj(f"+proj=sinu +R={alignment_dict['crs_params'][0]} +nadgrids=@null +wktext").crs
                wgs84_crs = CRS.from_epsg("4326")            
                lon, lat = transform_arrays(xv, yv, sinu_crs, wgs84_crs)

                temp = Make_Submatrix(corrected_AOD, lon, lat, alignment_dict, grid_metadata, grid_id, True)
                if len(temp) == 0:
                    continue
                for AOD_array in temp:
                    list_of_all_AOD_arrays.append(AOD_array)
                    
            else:
                fh = Dataset(filepath, mode='r')
                corrected_AOD = (fh.groups['4.4_KM_PRODUCTS']['AUXILIARY']['Aerosol_Optical_Depth_Raw'])
                lat = (fh['4.4_KM_PRODUCTS']['Latitude'])
                lon = (fh['4.4_KM_PRODUCTS']['Longitude'])
                alignment_dict = None
                temp = Make_Submatrix(corrected_AOD, lon, lat, alignment_dict, grid_metadata, grid_id, False)
                fh.close()
                if len(temp) == 0:
                    continue
                for AOD_array in temp:
                    list_of_all_AOD_arrays.append(AOD_array)
                
                    
                
                
                

        if list_of_all_AOD_arrays == 0:
            if training:
                features.append(np.array((np.nan, np.nan, np.nan, np.nan, np.nan)))
            else:
                features.append(np.array((0, 0, 0, 0, 0)))
        else:
#             print(list_of_all_AOD_arrays)
            features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))
#             print(features)

    print(features)
    return features

In [37]:
#Getting data to train the model
from datetime import datetime, timedelta


# features = []
train_labels = pd.read_csv("train_labels.csv") # Smallest subset
grid_metadata = pd.read_csv("grid_metadata.csv", index_col=0)
satellite_metadata = pd.read_csv("pm25_satellite_metadata.csv")
# satellite_metadata = satellite_metadata[satellite_metadata.granule_id.str.endswith('f')] #We now want .nc files too
satellite_metadata = satellite_metadata[satellite_metadata['split'] == 'train'].copy()


# print(grid_metadata['tz'].keys())
grid_id_list = list(grid_metadata['tz'].keys())
features = get_features(train_labels, satellite_metadata, grid_id_list, True)
    

12332
11534
4775
15707
4907
5581


  dx = c_double(coords[0])
  dy = c_double(coords[1])


[[0.02452925406396389 -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]]
13377


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


28854
2724
31278


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


33812
18129
16408
16603
26285


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


23837


  dx = c_double(coords[0])
  dy = c_double(coords[1])


[[0.2170814424753189 -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]]
23583


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


21370
15932
27044
14206
17627
1931
18145


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


283
30523
25205


  dx = c_double(coords[0])
  dy = c_double(coords[1])


[[0.02452925406396389 -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]]
4708


  dx = c_double(coords[0])
  dy = c_double(coords[1])


[[0.25055235624313354 -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]]
8220


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


16368


  dx = c_double(coords[0])
  dy = c_double(coords[1])


[[0.22498971223831177 -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]]
8967
30729
14454


  dx = c_double(coords[0])
  dy = c_double(coords[1])


KeyboardInterrupt: 

In [None]:
import pickle
    
pickle.dump( features, open( "save1.p", "wb" ) )
print(features)



In [None]:
#Making the model
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import KFold
import tensorflow as tf



features_cut = features.copy()

        


labels_array = np.array(train_labels.value)
cut_labels_array = list(labels_array[0:227].copy())

i = 0
while( i < len(features_cut)):
    print(features_cut[i][0])
    if np.isnan(features_cut[i][0]):
        features_cut.pop(i)
        cut_labels_array.pop(i)
    else:
        i+=1
        
print(len(features_cut))
print(len(cut_labels_array))

def make_model():

    model = Sequential()
    model.add(Dense(13, input_dim=5, kernel_initializer='normal', activation='relu'))
    model.add(Dense(6, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

model = make_model()
model.summary()
model.fit(X, Y, epochs=1000, batch_size=10)

# estimator = KerasRegressor(build_fn=make_model, nb_epoch=100, batch_size=5, verbose=0)

# X = np.array(features_cut)
# Y = np.array(cut_labels_array)

# kfold = KFold(n_splits=10)
# results = cross_val_score(estimator, X, Y, n_jobs=1)
# print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))


In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

predicted = model.predict(np.array(features_cut))
print(mean_squared_error(predicted, Y))
r2_score(predicted, Y)


In [None]:
pickle.dump( model, open( "model1.p", "wb" ) )

In [None]:
#testing the model, first getting features

features = []

test_labels = pd.read_csv("submission_format.csv") # Smallest subset
grid_metadata = pd.read_csv("grid_metadata.csv", index_col=0)
satellite_metadata = pd.read_csv("pm25_satellite_metadata.csv")
satellite_metadata = satellite_metadata[satellite_metadata.granule_id.str.endswith('f')]
satellite_metadata = satellite_metadata[satellite_metadata['split'] == 'test'].copy()


grid_id_list = list(grid_metadata['wkt'].keys())

features_test = get_features(test_labels, satellite_metadata, grid_id_list, False) 

1961
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
7044
['20170109T062500_maiac_dl_0.hdf'] Delhi


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


6750
['20170109T062500_maiac_dl_0.hdf'] Delhi


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


3641
['20170110T035000_maiac_tpe_0.hdf', '20170110T021000_maiac_tpe_0.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


10273
['20170108T054000_maiac_dl_0.hdf'] Delhi


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11588
['20170108T054000_maiac_dl_0.hdf'] Delhi
13468
['20170107T194500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


3473
['20170110T035000_maiac_tpe_0.hdf', '20170110T021000_maiac_tpe_0.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


842
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


6849
['20170109T062500_maiac_dl_0.hdf'] Delhi
412
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


12801
['20170109T062500_maiac_dl_0.hdf'] Delhi
1848
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


9266
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11500
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


4219
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
158
['20170111T025500_maiac_tpe_0.hdf', '20170111T025500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


4054
['20170109T062500_maiac_dl_0.hdf'] Delhi
3458
['20170107T194500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


4886
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11433
['20170109T062500_maiac_dl_0.hdf'] Delhi
13143
['20170110T070500_maiac_dl_0.hdf'] Delhi
2107
['20170109T030500_maiac_tpe_0.hdf', '20170109T030500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11348
['20170111T025500_maiac_tpe_0.hdf', '20170111T025500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


8190
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


8929
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
7643
['20170111T025500_maiac_tpe_0.hdf', '20170111T025500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


2973
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
1888
['20170111T025500_maiac_tpe_0.hdf', '20170111T025500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11113
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


2735
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11661
['20170109T062500_maiac_dl_0.hdf'] Delhi
10371
['20170110T035000_maiac_tpe_0.hdf', '20170110T021000_maiac_tpe_0.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


794
['20170108T022500_maiac_tpe_0.hdf', '20170108T040000_maiac_tpe_0.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


12636
['20170110T070500_maiac_dl_0.hdf'] Delhi
8538
['20170111T061000_maiac_dl_0.hdf'] Delhi
12717
['20170111T061000_maiac_dl_0.hdf'] Delhi
10557
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


13444
['20170107T194500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


12434
['20170110T070500_maiac_dl_0.hdf'] Delhi
11835
['20170110T070500_maiac_dl_0.hdf'] Delhi
3820
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
5108
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
965
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


4134
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


4039
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


7450
['20170110T070500_maiac_dl_0.hdf'] Delhi
7059
['20170110T070500_maiac_dl_0.hdf'] Delhi
12310
['20170110T070500_maiac_dl_0.hdf'] Delhi
9709
['20170109T062500_maiac_dl_0.hdf'] Delhi
7698
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
6573
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


7346
['20170109T062500_maiac_dl_0.hdf'] Delhi
5168
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
3717
['20170108T054000_maiac_dl_0.hdf'] Delhi
12311
['20170110T070500_maiac_dl_0.hdf'] Delhi
4955
['20170108T054000_maiac_dl_0.hdf'] Delhi
8737
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
9506
['20170111T061000_maiac_dl_0.hdf'] Delhi
1192
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
11215
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
4743
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


12294
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
4394
['20170110T070500_maiac_dl_0.hdf'] Delhi
473
['20170111T025500_maiac_tpe_0.hdf', '20170111T025500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


10430
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


5114
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11853
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11737
['20170110T070500_maiac_dl_0.hdf'] Delhi
2990
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


2220
['20170109T030500_maiac_tpe_0.hdf', '20170109T030500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


1727
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


487
['20170110T035000_maiac_tpe_0.hdf', '20170110T021000_maiac_tpe_0.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


6753
['20170109T062500_maiac_dl_0.hdf'] Delhi
1806
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
3061
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


12907
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
260
['20170107T194500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11489
['20170111T061000_maiac_dl_0.hdf'] Delhi
6489
['20170110T035000_maiac_tpe_0.hdf', '20170110T021000_maiac_tpe_0.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


6199
['20170110T070500_maiac_dl_0.hdf'] Delhi
3618
['20170111T025500_maiac_tpe_0.hdf', '20170111T025500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


10846
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
5800
['20170108T054000_maiac_dl_0.hdf'] Delhi
3839
['20170110T070500_maiac_dl_0.hdf'] Delhi
1342
['20170108T022500_maiac_tpe_0.hdf', '20170108T040000_maiac_tpe_0.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


9112
['20170110T070500_maiac_dl_0.hdf'] Delhi
7793
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
9303
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
3463
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


9130
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


1894
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


6903
['20170109T062500_maiac_dl_0.hdf'] Delhi
9878
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


3852
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


886
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
11194
['20170110T070500_maiac_dl_0.hdf'] Delhi
6077
['20170108T054000_maiac_dl_0.hdf'] Delhi
1499
['20170111T025500_maiac_tpe_0.hdf', '20170111T025500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


8251
['20170108T054000_maiac_dl_0.hdf'] Delhi
10870
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
9064
['20170110T070500_maiac_dl_0.hdf'] Delhi
8609
['20170108T054000_maiac_dl_0.hdf'] Delhi


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


3212
['20170110T070500_maiac_dl_0.hdf'] Delhi
11619
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


7712
['20170111T025500_maiac_tpe_0.hdf', '20170111T025500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


2104
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


818
['20170111T025500_maiac_tpe_0.hdf', '20170111T025500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


9923
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


1711
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


11425
['20170109T030500_maiac_tpe_0.hdf', '20170109T030500_maiac_tpe_1.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


3758
['20170108T022500_maiac_tpe_0.hdf', '20170108T040000_maiac_tpe_0.hdf'] Taipei


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


6423
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


10968
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


9046
['20170109T062500_maiac_dl_0.hdf'] Delhi
5222
['20170110T070500_maiac_dl_0.hdf'] Delhi
7370
['20170109T193000_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


540
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
11502
['20170110T201500_maiac_la_0.hdf'] Los Angeles (SoCAB)


  features.append(np.array(collect_features(list_of_all_AOD_arrays, 100, True)))


6198
['20170110T070500_maiac_dl_0.hdf'] Delhi
9467
['20170108T202500_maiac_la_0.hdf'] Los Angeles (SoCAB)
105
['20170108T022500_maiac_tpe_0.hdf', '20170108T040000_maiac_tpe_0.hdf'] Taipei


In [None]:
hdf = SD('test/20170109T193000_maiac_la_0.hdf')
hdf47 = hdf.select(0)
for i in range(4):
    plt.imshow(hdf47.get()[i])
    plt.show()
print(hdf47.get()[0][1199, 1199])