# Trim NAIP Tiles
In this notebook, we'll modify each NAIP tile to enforce the desired bounding box for each to ensure consistency with the other data layers.


In [None]:
! pip install -q geopandas rasterio

[K     |████████████████████████████████| 972kB 7.8MB/s 
[K     |████████████████████████████████| 18.3MB 249kB/s 
[K     |████████████████████████████████| 14.8MB 329kB/s 
[K     |████████████████████████████████| 6.5MB 47.4MB/s 
[?25h

In [None]:
import os
import numpy as np
import geopandas as gpd
import rasterio
from rasterio import windows, transform
from tqdm.notebook import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


We'll be classifying the dominant cover type for stands within USGS Quarter Quads we've identified as training tiles where we have good coverage of hand-drawn stand boundaries.

In [None]:
SHP_DIR = '/content/drive/Shareddrives/stand_mapping/data/interim'

WA11_SHP = 'washington_utm11n_training_quads_epsg6340.shp'
WA10_SHP = 'washington_utm10n_training_quads_epsg6339.shp'
OR10_SHP = 'oregon_utm10n_training_quads_epsg6339.shp'
OR11_SHP = 'oregon_utm11n_training_quads_epsg6340.shp'

or10_gdf = gpd.read_file(os.path.join(SHP_DIR, OR10_SHP))
or11_gdf = gpd.read_file(os.path.join(SHP_DIR, OR11_SHP))
wa10_gdf = gpd.read_file(os.path.join(SHP_DIR, WA10_SHP))
wa11_gdf = gpd.read_file(os.path.join(SHP_DIR, WA11_SHP))

In [None]:
def get_tile_bbox(geometry):
    """Rounds bounding box coordinates outward to nearest integer"""
    xmin, ymin, xmax, ymax = geometry.bounds
    xmin, ymin = np.floor((xmin, ymin)).astype(int)
    xmax, ymax = np.ceil((xmax, ymax)).astype(int)
    bbox = (xmin, ymin, xmax, ymax)
    return bbox

def get_naip_path(cell_id, state_name, year):
    DIRNAME = f'/content/drive/Shareddrives/stand_mapping/data/processed/training_tiles/{state_name}/naip/{year}'
    fname = f'{cell_id}_naip_{year}.tif'
    path_to_file = os.path.join(DIRNAME, fname)
    
    return path_to_file

In [None]:
def trim_naip(tile_gdf, cell_id, state_name, year):
    path = get_naip_path(cell_id, state_name, year)
    geom = tile_gdf.loc[tile_gdf.CELL_ID == cell_id]['geometry'].iloc[0]
    bbox = get_tile_bbox(geom)
    xmin, ymin, xmax, ymax = bbox
    width, height = xmax - xmin, ymax - ymin

    with rasterio.open(path) as src:
        window = windows.from_bounds(xmin, ymin, xmax, ymax, 
                                     transform=src.transform, 
                                     height=height, width=width)
        data = src.read(window=window)
        new_profile = src.profile.copy()

    new_trf = transform.from_bounds(xmin, ymin, xmax, ymax, 
                                    width=width, height=height)
    new_profile.update(transform=new_trf, width=width, height=height)

    with rasterio.open(path, 'w', **new_profile) as dst:
        dst.write(data)  

In [None]:
trim_naip(wa11_gdf, 116470, 'washington', 2009)

In [None]:
GDF = or11_gdf
STATE_NAME = 'oregon'
YEARS = [2009, 2011, 2014, 2016]

for year in YEARS:
    with tqdm(total=len(GDF), desc=str(year)) as pbar:
        for idx, row in GDF.iterrows():
            cell_id = row['CELL_ID']
            trim_naip(GDF, cell_id, STATE_NAME, year)
            pbar.update()

HBox(children=(FloatProgress(value=0.0, description='2009', max=524.0, style=ProgressStyle(description_width='…




HBox(children=(FloatProgress(value=0.0, description='2011', max=524.0, style=ProgressStyle(description_width='…




HBox(children=(FloatProgress(value=0.0, description='2014', max=524.0, style=ProgressStyle(description_width='…




HBox(children=(FloatProgress(value=0.0, description='2016', max=524.0, style=ProgressStyle(description_width='…




In [None]:
GDF = or10_gdf
STATE_NAME = 'oregon'
YEARS = [2009, 2011, 2014, 2016]

for year in YEARS:
    with tqdm(total=len(GDF), desc=str(year)) as pbar:
        for idx, row in GDF_.iterrows():
            cell_id = row['CELL_ID']
            trim_naip(GDF, cell_id, STATE_NAME, year)
            pbar.update()

HBox(children=(FloatProgress(value=0.0, description='2009', max=607.0, style=ProgressStyle(description_width='…




HBox(children=(FloatProgress(value=0.0, description='2011', max=607.0, style=ProgressStyle(description_width='…




HBox(children=(FloatProgress(value=0.0, description='2014', max=607.0, style=ProgressStyle(description_width='…




HBox(children=(FloatProgress(value=0.0, description='2016', max=607.0, style=ProgressStyle(description_width='…




In [None]:
GDF = wa10_gdf
STATE_NAME = 'washington'
YEARS = [2009, 2011, 2015, 2017]

for year in YEARS:
    with tqdm(total=len(GDF), desc=str(year)) as pbar:
        for idx, row in GDF.iterrows():
            cell_id = row['CELL_ID']
            trim_naip(GDF, cell_id, STATE_NAME, year)
            pbar.update()

HBox(children=(FloatProgress(value=0.0, description='2009', max=277.0, style=ProgressStyle(description_width='…




HBox(children=(FloatProgress(value=0.0, description='2011', max=277.0, style=ProgressStyle(description_width='…




HBox(children=(FloatProgress(value=0.0, description='2015', max=277.0, style=ProgressStyle(description_width='…




HBox(children=(FloatProgress(value=0.0, description='2017', max=277.0, style=ProgressStyle(description_width='…




In [None]:
GDF = wa11_gdf.iloc[62:]
STATE_NAME = 'washington'
YEARS = [2009, 2011, 2015, 2017]

for year in YEARS:
    with tqdm(total=len(GDF), desc=str(year)) as pbar:
        for idx, row in GDF.iterrows():
            cell_id = row['CELL_ID']
            trim_naip(GDF, cell_id, STATE_NAME, year)
            pbar.update()

HBox(children=(FloatProgress(value=0.0, description='2017', max=20.0, style=ProgressStyle(description_width='i…


