In [2]:
! pip install geopandas -q

[K     |████████████████████████████████| 972kB 2.8MB/s 
[K     |████████████████████████████████| 10.9MB 20.9MB/s 
[K     |████████████████████████████████| 14.7MB 302kB/s 
[?25h

In [3]:
import ee
import os
import geopandas as gpd

## Get Earth Engine Running
To access GEE, we will need to authenticate our account, and then initialize a connection to a server. 

In [None]:
ee.Authenticate()

In [5]:
ee.Initialize()

# Mount Google Drive
We have shapefiles containing the geospatial boundaries of the map tiles we'll be gathering data from on our Google Drive.

To mount our Drive and access our files, we have to authenticate with Google Drive first.

In [6]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


# Retrieve NAIP Quads
For each of the tiles in a GeoDataFrame, and each year for which we want to get the NAIP imagery, we will filter the NAIP collection from GEE to identify the image that includes the centroid of our tile. We will then export that NAIP image to our Google Drive.

In [7]:
naip = ee.ImageCollection('USDA/NAIP/DOQQ')
gdf = gpd.read_file(os.path.join(WORK_DIR, TILE_SHP))

Loop through each feature in the GeoDataFrame and submit a task on the Earth Engine server to export the corresponding NAIP image(s).

In [10]:
WORK_DIR = '/content/drive/Shared drives/stand_mapping/data/processed/training_tiles'
TILE_SHP = 'oregon_utm11n_training_quads_epsg6340.shp'
STATE = 'OR'
YEARS = [2009, 2011, 2012, 2014, 2016]

In [12]:
gdf = gpd.read_file(os.path.join(WORK_DIR, TILE_SHP))
tasks = {}

print('Retrieving images for {:,d} features in GeoDataFrame'.format(len(gdf)))
for year in YEARS:
    folder = f'naip_tiles-{STATE}-{year}'
    print('\n\n', year)
    for idx, row in gdf.iterrows():
        # get centroid of the tile and find the NAIP image(s) it falls within
        bbox = row['geometry'].centroid.buffer(1).bounds
        geom = ee.Geometry.Rectangle(bbox, proj=f'EPSG:{gdf.crs.to_epsg()}', evenOdd=True, geodesic=False)
        coll = naip.filterBounds(geom).filterDate(f'{year}-01-01', f'{year}-12-31')
        img_list = coll.toList(coll.size())

        num_images = coll.size().getInfo()
        if num_images == 0:
            print('\n', f'CELL_ID {row.CELL_ID} matches no NAIP tiles')
        if num_images > 1:
            print('\n', f'CELL_ID {row.CELL_ID} matches {num_images} NAIP tiles')

        for i in range(num_images):
            img = ee.Image(img_list.get(i))

            try:
                name = img.id().getInfo()
            except ee.EEException:  # no match was found
                print('\n', 
                      f'CELL_ID {row.CELL_ID} erred on NAIP tile #{i}')
                continue  # on to next image in img_list
            
            # submit a task to the server to export the image to our Drive
            if not os.path.exists(
                os.path.join('/content/drive/My Drive', folder, name + '.tif')):
            
                task = ee.batch.Export.image.toDrive(img,
                                                     description=name,
                                                     maxPixels=125e6, 
                                                     folder=folder)
                task.start()
                tasks[idx] = task  # keep track of our tasks in a dictionary

        # report progress
        if idx % 100 == 0 and idx > 0:
            print()
        if idx % 10 == 0:
            print(idx, end='')
        else:
            print('.', end='')

Retrieving images for 524 features in GeoDataFrame


 2009
0.........10.........20.........30.........40.....
 CELL_ID 224665 matches 2 NAIP tiles
....50.........60.........70.........80.........90.........
100.........110.........120.........130.........140.........150.........160.........170
 CELL_ID 130202 matches 2 NAIP tiles
.........180.........190.........
200.........210.........220.........230......
 CELL_ID 176776 matches 2 NAIP tiles
...240..
 CELL_ID 201528 matches 2 NAIP tiles
.......250.........260.........270.........280.........290.........
300.........310.........320.
 CELL_ID 265271 matches 2 NAIP tiles
.....
 CELL_ID 259170 matches 2 NAIP tiles
...330..
 CELL_ID 277967 matches 2 NAIP tiles
.......340.........350.........360.........370.........380.........390.........
400.........410.........420.........430........
 CELL_ID 156185 matches 2 NAIP tiles
.
 CELL_ID 156187 matches 2 NAIP tiles
440.........450.........460.....
 CELL_ID 155353 matches 2 NAIP tiles
....470.

In [1]:
WORK_DIR = '/content/drive/Shared drives/stand_mapping/data/processed/training_tiles'
TILE_SHP = 'oregon_utm10n_training_quads_epsg6339.shp'
STATE = 'OR'
YEARS = [#2009, 2011, 2012, 
         2014, 2016]

In [14]:
gdf = gpd.read_file(os.path.join(WORK_DIR, TILE_SHP))
tasks = {}

print('Retrieving images for {:,d} features in GeoDataFrame'.format(len(gdf)))
for year in YEARS:
    folder = f'naip_tiles-{STATE}-{year}'
    print('\n\n', year)
    for idx, row in gdf.iterrows():
        # get centroid of the tile and find the NAIP image(s) it falls within
        bbox = row['geometry'].centroid.buffer(1).bounds
        geom = ee.Geometry.Rectangle(bbox, proj=f'EPSG:{gdf.crs.to_epsg()}', evenOdd=True, geodesic=False)
        coll = naip.filterBounds(geom).filterDate(f'{year}-01-01', f'{year}-12-31')
        img_list = coll.toList(coll.size())

        num_images = coll.size().getInfo()
        if num_images == 0:
            print('\n', f'CELL_ID {row.CELL_ID} matches no NAIP tiles')
        if num_images > 1:
            print('\n', f'CELL_ID {row.CELL_ID} matches {num_images} NAIP tiles')

        for i in range(num_images):
            img = ee.Image(img_list.get(i))

            try:
                name = img.id().getInfo()
            except ee.EEException:  # no match was found
                print('\n', 
                      f'CELL_ID {row.CELL_ID} erred on NAIP tile #{i}')
                continue  # on to next image in img_list
            
            # submit a task to the server to export the image to our Drive
            if not os.path.exists(
                os.path.join('/content/drive/My Drive', folder, name + '.tif')):
            
                task = ee.batch.Export.image.toDrive(img,
                                                     description=name,
                                                     maxPixels=125e6, 
                                                     folder=folder)
                task.start()
                tasks[idx] = task  # keep track of our tasks in a dictionary

        # report progress
        if idx % 100 == 0 and idx > 0:
            print()
        if idx % 10 == 0:
            print(idx, end='')
        else:
            print('.', end='')

Retrieving images for 607 features in GeoDataFrame


 2009
0.........10.........20.........30.........40.........50.........60.........70.........80.........90.........
100.........110.........120..
 CELL_ID 254524 matches 2 NAIP tiles
.......130.........140.........150.........160.........170.........180.........190.........
200.........210.........220.........230.........240.........250.........260.........270.........280.........290.........
300.........310.........320.........330.........340.........350.........360.........370.
 CELL_ID 256194 matches 2 NAIP tiles
..
 CELL_ID 264469 matches 2 NAIP tiles
......380.........390.........
400.........410.........420.........430.........440.........450.........460.........470.........480.........490.........
500.........510.........520.........530.........540.........550.........560.........570.........580.........590.........
600......

 2011
0.........10.........20.........30.........40.........50.........60.........70.........80......

In [15]:
WORK_DIR = '/content/drive/Shared drives/stand_mapping/data/processed/training_tiles'
TILE_SHP = 'washington_utm11n_training_quads_epsg6340.shp'
STATE = 'WA'
YEARS = [2009, 2011, 2013, 2015, 2017]

In [16]:
gdf = gpd.read_file(os.path.join(WORK_DIR, TILE_SHP))
tasks = {}

print('Retrieving images for {:,d} features in GeoDataFrame'.format(len(gdf)))
for year in YEARS:
    folder = f'naip_tiles-{STATE}-{year}'
    print('\n\n', year)
    for idx, row in gdf.iterrows():
        # get centroid of the tile and find the NAIP image(s) it falls within
        bbox = row['geometry'].centroid.buffer(1).bounds
        geom = ee.Geometry.Rectangle(bbox, proj=f'EPSG:{gdf.crs.to_epsg()}', evenOdd=True, geodesic=False)
        coll = naip.filterBounds(geom).filterDate(f'{year}-01-01', f'{year}-12-31')
        img_list = coll.toList(coll.size())

        num_images = coll.size().getInfo()
        if num_images == 0:
            print('\n', f'CELL_ID {row.CELL_ID} matches no NAIP tiles')
        if num_images > 1:
            print('\n', f'CELL_ID {row.CELL_ID} matches {num_images} NAIP tiles')

        for i in range(num_images):
            img = ee.Image(img_list.get(i))

            try:
                name = img.id().getInfo()
            except ee.EEException:  # no match was found
                print('\n', 
                      f'CELL_ID {row.CELL_ID} erred on NAIP tile #{i}')
                continue  # on to next image in img_list
            
            # submit a task to the server to export the image to our Drive
            if not os.path.exists(
                os.path.join('/content/drive/My Drive', folder, name + '.tif')):
            
                task = ee.batch.Export.image.toDrive(img,
                                                     description=name,
                                                     maxPixels=125e6, 
                                                     folder=folder)
                task.start()
                tasks[idx] = task  # keep track of our tasks in a dictionary

        # report progress
        if idx % 100 == 0 and idx > 0:
            print()
        if idx % 10 == 0:
            print(idx, end='')
        else:
            print('.', end='')

Retrieving images for 82 features in GeoDataFrame


 2009
0.........10.........20.........30.........40.........50.........60.........70.........80.

 2011
0.........10.........20.........30.........40.........50.........60.........70.........80.

 2013
0.........10.........20.........30.........40.........50.........60.........70.........80.

 2015
0.........10.........20.........30.........40.........50.........60.........70.........80.

 2017
0.........10.........20.........30.........40.........50.........60.........70.........80.

In [17]:
WORK_DIR = '/content/drive/Shared drives/stand_mapping/data/processed/training_tiles'
TILE_SHP = 'washington_utm10n_training_quads_epsg6339.shp'
STATE = 'WA'
YEARS = [2009, 2011, 2013, 2015, 2017]

In [18]:
gdf = gpd.read_file(os.path.join(WORK_DIR, TILE_SHP))
tasks = {}

print('Retrieving images for {:,d} features in GeoDataFrame'.format(len(gdf)))
for year in YEARS:
    folder = f'naip_tiles-{STATE}-{year}'
    print('\n\n', year)
    for idx, row in gdf.iterrows():
        # get centroid of the tile and find the NAIP image(s) it falls within
        bbox = row['geometry'].centroid.buffer(1).bounds
        geom = ee.Geometry.Rectangle(bbox, proj=f'EPSG:{gdf.crs.to_epsg()}', evenOdd=True, geodesic=False)
        coll = naip.filterBounds(geom).filterDate(f'{year}-01-01', f'{year}-12-31')
        img_list = coll.toList(coll.size())

        num_images = coll.size().getInfo()
        if num_images == 0:
            print('\n', f'CELL_ID {row.CELL_ID} matches no NAIP tiles')
        if num_images > 1:
            print('\n', f'CELL_ID {row.CELL_ID} matches {num_images} NAIP tiles')

        for i in range(num_images):
            img = ee.Image(img_list.get(i))

            try:
                name = img.id().getInfo()
            except ee.EEException:  # no match was found
                print('\n', 
                      f'CELL_ID {row.CELL_ID} erred on NAIP tile #{i}')
                continue  # on to next image in img_list
            
            # submit a task to the server to export the image to our Drive
            if not os.path.exists(
                os.path.join('/content/drive/My Drive', folder, name + '.tif')):
            
                task = ee.batch.Export.image.toDrive(img,
                                                     description=name,
                                                     maxPixels=125e6, 
                                                     folder=folder)
                task.start()
                tasks[idx] = task  # keep track of our tasks in a dictionary

        # report progress
        if idx % 100 == 0 and idx > 0:
            print()
        if idx % 10 == 0:
            print(idx, end='')
        else:
            print('.', end='')

Retrieving images for 277 features in GeoDataFrame


 2009
0.........10.........20
 CELL_ID 215589 matches 2 NAIP tiles
.
 CELL_ID 215591 matches 2 NAIP tiles
........30.........40.........50.........60.........70.........80.........90.........
100....
 CELL_ID 133123 matches 2 NAIP tiles
.....110.........120.........130.........140.........150.........160.........170.........180.........190.........
200.........210.........220.........230.........240.........250.........260.........270......

 2011
0.........10.........20
 CELL_ID 215589 matches 2 NAIP tiles
.
 CELL_ID 215591 matches 2 NAIP tiles
........30.........40.........50.........60.........70.........80.........90.........
100....
 CELL_ID 133123 matches 2 NAIP tiles
.....110.........120.........130.........140.........150.........160.........170.........180.........190.........
200.........210.........220.........230.........240.........250.........260.........270......

 2013
0.........10.........20.........30
 CELL_ID 30523

## Missing NAIP Tiles
| STATE | CELL_ID | Year |
|:-----:|:-------:|:----:|
|WA|305230|2013|
|WA|305232|2013|
|WA|305229|2013|

## Extra NAIP Tiles
| STATE | CELL_ID | Year | # Tiles |
|:-----:|:-------:|:----:|:-------:|
|WA|133123|2009, 2011|2|
|WA|215589|2009, 2011|2|
|WA|215591|2009, 2011|2|
|OR|130202|2009, 2011|2|
|OR|156185|2009, 2011|2|
|OR|156187|2009, 2011|2| 
|OR|155353|2009, 2011|2|
|OR|176776|2009, 2011|2| 
|OR|201528|2009, 2011|2|
|OR|224665|2009, 2011|2|
|OR|254524|2009, 2012|2|
|OR|254524|2014, 2016|2|
|OR|256194|2009, 2012, 2014, 2016|2|
|OR|259170|2009, 2011|2| 
|OR|264469|2009, 2011|2|
|OR|265271|2009, 2011|2|
|OR|277967|2009, 2011|2|

In [None]:
import glob
processed = glob.glob('/content/drive/My Drive/naip_tiles-WA-2017/*.tif')
len(processed)

277