# Assigning Land Cover Types to Stands
Each stand layer from a different agency and region often comes with unique attributes recorded about the forest stands delineated there. Several agencies and forests have developed their own coding systems that can be mapped over to the basic cover types we are interested in using in our models:  

* 1 = Water
* 2 = Forest
* 3 = Field
* 4 = Barren/Non-vegetated
* 5 = Developed

In [None]:
! pip install -q geopandas rasterio

In [None]:
import os
import glob
import numpy as np
from collections import Counter
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio import windows, transform
from rasterio.plot import reshape_as_image, reshape_as_raster, show
from rasterio import features
from matplotlib import pyplot as plt

from tqdm.notebook import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


We'll be classifying the dominant cover type for stands within USGS Quarter Quads we've identified as training tiles where we have good coverage of hand-drawn stand boundaries.

In [None]:
FILE_TYPE = 'stands'
ROOT_DIR = '/content/drive/Shareddrives/stand_mapping/data/interim/training_tiles'

paths = []
for state_name in ['oregon', 'washington']:
    for root, dirnames, files in os.walk(os.path.join(ROOT_DIR, state_name, FILE_TYPE)):
        for f in files:
            if '.geojson' in f:
                paths.append(os.path.join(root, f))

len(paths)

2020

In [None]:
def parse_stand_path(path_to_file):
    dirname, basename = os.path.split(path_to_file)
    cell_id = int(basename.split('_')[0])
    year = int(basename.split('_')[-1].split('.')[0])
    source = basename.split('_')[2]
    return cell_id, year, source

In [None]:
stands_info = [parse_stand_path(path) for path in paths]
stands_df = pd.DataFrame(stands_info, columns=['CELL_ID', 'YEAR', 'AGENCY'])
stands_df['PATH'] = paths
stands_df['STATE_NAME'] = ''
stands_df.loc[stands_df.PATH.str.contains('oregon'), 'STATE_NAME'] = 'oregon'
stands_df.loc[stands_df.PATH.str.contains('washington'), 'STATE_NAME'] = 'washington'
stands_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2020 entries, 0 to 2019
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   CELL_ID     2020 non-null   int64 
 1   YEAR        2020 non-null   int64 
 2   AGENCY      2020 non-null   object
 3   PATH        2020 non-null   object
 4   STATE_NAME  2020 non-null   object
dtypes: int64(2), object(3)
memory usage: 79.0+ KB


In [None]:
pd.unique(stands_df.AGENCY)

array(['willamette-usfs', 'malheur-usfs', 'wallowa-whitman-usfs',
       'umatilla-usfs', 'mthood-usfs', 'deschutes-usfs', 'blm',
       'colville-usfs', 'okanogan-usfs', 'gp-usfs', 'dnr'], dtype=object)

In [None]:
COLOR_MAP = {
    0: [1.0,1.0,1.0], # none, nodata
    1: [0,0,1], # water
    2: [0,0.5,0], # trees
    3: [0.5,1,0.5], # field
    4: [0.5,0.375,0.375], # barren/non-vegetated
    5: [0, 0, 0], # building
    255: [1.0, 0, 0]
}

def recolor_landcover(lc_img, cmap, soft=False):
  cover_colors = np.zeros((lc_img.shape[0], lc_img.shape[1], 3))
  if not soft:
      for cov in np.unique(lc_img):
          mask = lc_img == cov
          cover_colors[mask] = cmap[cov]
  if soft: # we're given the probability of each class for each pixel
      for i in range(num_classes - 1):
          cover_colors[:,:,0] += lc_img[:,:,i] * cmap[i+1][0] # R
          cover_colors[:,:,1] += lc_img[:,:,i] * cmap[i+1][1] # G
          cover_colors[:,:,2] += lc_img[:,:,i] * cmap[i+1][2] # B

  return (cover_colors * 255).astype(np.uint8)

In [None]:
PNW_ECOCLASS = {
        'AB': 5, # Buildings, structures, roads, campgrounds
        'AC': 3, # Cultivated lands
        'AD': 3, # ???
        'AG': 3, # Grasslands: permanent pasture that is maintained in forest, shrub, or desert climates
        'AO': 2, # Orchards: maintained exotic forest stands
        'AQ': 3, # ??? some time of agricultural or administrative area?
        'AR': 3, # Recreation areas; parks, golf courses, or play areas
        'AX': 3, # Administrative or agricultural (no descriptor specified)
        'CA': 2, # Alpine open, forest park
        'CC': 2, # Western red cedar
        'CD': 2, # Douglas-fir
        'CE': 2, # Subalpine fir, Engelmann spruce
        'CF': 2, # Silver or noble fir
        'CH': 2, # Hemlock, western
        'CJ': 2, # Juniper, western
        'CL': 2, # Lodgepole pine
        'CM': 2, # Mountain hemlock
        'CP': 2, # Ponderosa, Jeffrey pine
        'CR': 2, # Red fir
        'CS': 2, # Sitka spruce
        'CW': 2, # Grand or white fir
        'CX': 2, # Coniferous forest (no descriptor specified)
        'FM': 3, # Moist (mesic) forbland
        'FS': 3, # Subalpine or alpine forbland
        'FW': 3, # Wet forbland
        'FX': 3, # Forbland (no descriptor specified)
        'GA': 3, # Annual grasslands
        'GB': 3, # Bunchgrasses
        'GM': 3, # Mesic (forest zone) grasslands
        'GS': 3, # Subalpine or alpine grass or sedge lands
        'GX': 3, # PNC grassland (no descriptor specified)
        'HA': 2, # Alder
        'HB': 2, # Bigleaf maple
        'HC': 2, # Cottonwood - ash bottoms
        'HL': 2, # Canyon live oak tree size
        'HO': 2, # Oak, Oregon or black
        'HQ': 2, # Quaking aspen
        'HT': 2, # Tanoak tree size
        'HX': 2, # Hardwood woodlands or forest (no descriptor specified)
        'MD': 3, # Dry meadow
        'MM': 3, # Moist meadow
        'MS': 3, # Subalpine or alpine wet meadow
        'MT': 1, # Tule, standing water
        'MW': 3, # Wet meadow
        'MX': 3, # Meadow; grass/sedge (no descriptor specified)
        'NA': 4, # ??? some kind of non-vegetated?
        'NC': 4, # Cinders, lava flow, mud flow, glacial wash; continuous disturbance or low site potential precludes vegetation reaching over 10 percent crown cover
        'NF': 4, # Flood plain periodically denuded of vegetation with no foreseeable means of establishing plants
        'NI': 4, # Ice fields, glaciers, perennial snow
        'NL': 4, # Landform failure, natural slumps, avalanches, avalanche trails with little practical means of establishing vegetative cover
        'NM': 4, # Mining tailings, dredging; human-caused disturbances, which has little current vegetation potential
        'NQ': 4, # ??? some kind of non-vegetated area
        'NR': 4, # Rocky lands with too little soil (or no soil) for good vegetative cover
        'NS': 4, # Sand with minimal vegetative cover; shoreline or interior dune
        'NT': 4, # Talus with minimal vegetative potential
        'NX': 4, # Non-vegetated land; less than 10 percent crown cover potential (no descriptor specified)
        None: 0, # no data?
        'SC': 3, # Chaparral
        'SD': 3, # Dry shrubland (sagebrush)
        'SM': 3, # Mesic (forest zone) shrublands
        'SS': 3, # Subapline to alpine shrublands
        'SW': 3, # Wet shrublands
        'SX': 3, # PNC shrubland (no descriptor specified)
        'WL': 1, # Lakes, ponds, impoundments; perennial or intermittent
        'WR': 1, # Running water bodies, streams, rivers, creeks, ditches; perennial or intermittent
        'WX': 1, # Water-covered areas (no descriptor specified)
    }

LAND_COVER_CROSSWALKS = {
    'willamette-usfs': {
        'AG' : 3,  # grassland
        'CF' : 5,  # silver or noble fir  -- > developed?
        'DX' : 3,  # mixed desert shrub
        'FL' : 2,  # forestland ?
        'FX' : 3,  # forbland
        'GX' : 3,  # grassland
        'NX' : 4,  # non-vegetated
        'QX' : 2,  # ??? Quercus spp?
        'SN' : 4,  # snow/ice
        'SX' : 3,  # shrubland
        'WX' : 1,  # water
        'YX' : 3,  # recent harvest?
        None: 0, # ???
    },
    'malheur-usfs': {
        'FV': 2,  # forest vegetation
        'NF': 3,  # non-forest
        'NV': 4,  # non-vegetated
        None: 0,  # ???
    },
    'umatilla-usfs': {
        'FV': 2,  # forest vegetation
        'NF': 3,  # non-forest
        'NV': 4,  # non-vegetated
        None: 0,  # ???
    },
    'mthood-usfs': PNW_ECOCLASS,
    'gp-usfs': PNW_ECOCLASS,
    'blm': {
        'FCO': 2, # Forest - conifers
        'FHD': 2, # Forest - hardwoods
        'FMX': 2, # Forest - mixed conifer and hardwood
        'FNS': 3, # Forest - currently non-stocked
        'NA': 3, # Non-forest - agriculture/range
        'NB': 3, # Non-forest - brush
        'NG': 3, # Non-forest - natural grass
        'NH': 5, # Non-forest - Roads/maintenance facility
        'NO': 5, # Non-forest - cultural development
        'NR': 4, # Non-forest - rock outcrop
        'NU': 3, # Non-forest - utility corridor
        'NW': 1, # Non-forest - water/marsh
        None: 0, # no data
    },
    'dnr': {
        0: 0, # no data
        10: 5, # built-up land
        15: 5, # ???
        16: 5, # ???
        19: 5, # ???
        21: 3, # Ag.land/non-cult.--Improved pasture land
        22: 3, # Ag.land/non-cult.--Unimproved pasture land
        23: 3, # Ag.land/non-cult.--Nurseries and ornamentals
        24: 4, # Ag.land/non-cult.--Confined feeding operation
        25: 2, # Ag.land/non-cult.--Christmas trees
        31: 2, # Ag.land/cultivated--Apple trees
        32: 2, # Ag.land/cultivated--Pear trees
        33: 2, # Ag.land/cultivated--Soft fruit trees
        34: 3, # Ag.land/cultivated--Berries
        35: 3, # Ag.land/cultivated--Vineyards
        36: 3, # Ag.land/cultivated--Annual crop
        41: 2, # Forest land--With livestock grazing
        42: 2, # Forest land--Without livestock grazing
        43: 2, # Forest land--Wetlands
        44: 2, # ??? 
        51: 1, # Water--Rivers, streams, and canals
        52: 1, # Water--Lakes
        53: 1, # Water--Reservoirs
        54: 1, # Water--Marine waters
        55: 1, # Water--Non-forested wetlands
        61: 3, # Grass/Shrub--Grass Land
        62: 3, # Grass/Shrub--Shrub Land
        63: 3, # Grass/Shrub--Mixed Grass and Shrub
        70: 4, # Exposed land--Title
        71: 4, # Exposed land--Beaches
        72: 4, # Exposed land--Sandy areas other than beaches
        73: 4, # Exposed land--Bare exposed rock or soil
        74: 4, # Exposed land--Surface mines
        75: 4, # Exposed land--Volcanic mudflow
        76: 4, # Exposed land--Mixed
        80: 4, # Perennial Snow/Ice--Title
        81: 4, # Perennial Snow/Ice--Snowfields
        82: 4, # Perennial Snow/Ice--Glaciers
    }
}

In [None]:
def classify_stands(in_path, agency, field_name, verbose=False, trim=None):
    stands = gpd.read_file(in_path)
    if 'stands_dnr_2017' in in_path:
        field_name = 'LAND_COV_CD'
    stands['COVER_TYPE'] = -9999
    if trim is not None:
        cover_types = [x[:trim] if x is not None else None for x in pd.unique(stands[field_name])]
    else:
        cover_types = pd.unique(stands[field_name])
    for cover_type in cover_types:
        if cover_type is not None:
            if cover_type in LAND_COVER_CROSSWALKS[agency].keys():
                if trim is not None:
                    stands.loc[stands[field_name].str[:trim] == cover_type, 'COVER_TYPE'] = LAND_COVER_CROSSWALKS[agency][cover_type]
                else:
                    stands.loc[stands[field_name] == cover_type, 'COVER_TYPE'] = LAND_COVER_CROSSWALKS[agency][cover_type]
            elif verbose:
                print(f'{cover_type} not in crosswalk.')
        else:
            stands.loc[stands[field_name].isna(), 'COVER_TYPE'] = 0

    missing = pd.unique(stands.loc[stands.COVER_TYPE == -9999, field_name])
    assert len(stands.loc[stands.COVER_TYPE == -9999]) == 0, f'Stands in {in_path} missing COVER_TYPE: {missing}'
    return stands

In [None]:
def classify_agency_stands(agency, field_name, overwrite=False, trim=None):
    agency_tiles = stands_df.loc[stands_df.AGENCY == agency]

    failed = []
    with tqdm(total=len(agency_tiles), desc='processed') as pbar:
        with tqdm(total=len(agency_tiles), desc='failed') as failed_pbar:
            for idx, row in agency_tiles.iterrows():
                path = row['PATH']
                outpath = path.replace('/stands/', '/stands_self_classified/')
                if not os.path.exists(outpath) or overwrite:
                    try:
                        stands = classify_stands(path, agency, field_name, trim=trim)
                        stands.to_file(outpath, driver='GeoJSON')
                    except:
                        raise
                        failed.append(path)
                        failed_pbar.update()
                pbar.update()
      
    return failed

In [None]:
AGENCY = 'willamette-usfs'
FIELD_NAME = 'LAND_COVER'

discover_veg_types(AGENCY, FIELD_NAME)

HBox(children=(FloatProgress(value=0.0, max=212.0), HTML(value='')))




{'AG', 'CF', 'DX', 'FL', 'FX', 'GX', 'NX', None, 'QX', 'SN', 'SX', 'WX', 'YX'}

In [None]:
AGENCY = 'willamette-usfs'
FIELD_NAME = 'LAND_COVER'
OVERWRITE = False

failed_paths = classify_agency_stands(AGENCY, FIELD_NAME, OVERWRITE)

HBox(children=(FloatProgress(value=0.0, description='processed', max=212.0, style=ProgressStyle(description_wi…

HBox(children=(FloatProgress(value=0.0, description='failed', max=212.0, style=ProgressStyle(description_width…





In [None]:
AGENCY = 'malheur-usfs'
FIELD_NAME = 'VEG_GRP'

discover_veg_types(AGENCY, FIELD_NAME)

HBox(children=(FloatProgress(value=0.0, max=186.0), HTML(value='')))




{'FV', 'NF', 'NV', None}

In [None]:
AGENCY = 'malheur-usfs'
FIELD_NAME = 'VEG_GRP'
OVERWRITE = True

failed_paths = classify_agency_stands(AGENCY, FIELD_NAME, OVERWRITE)

HBox(children=(FloatProgress(value=0.0, description='processed', max=186.0, style=ProgressStyle(description_wi…

HBox(children=(FloatProgress(value=0.0, description='failed', max=186.0, style=ProgressStyle(description_width…





In [None]:
AGENCY = 'umatilla-usfs'
FIELD_NAME = 'VEG_GRP'

discover_veg_types(AGENCY, FIELD_NAME)

HBox(children=(FloatProgress(value=0.0, max=131.0), HTML(value='')))




{'FV', 'NF', 'NV'}

In [None]:
AGENCY = 'umatilla-usfs'
FIELD_NAME = 'VEG_GRP'
OVERWRITE = True

failed_paths = classify_agency_stands(AGENCY, FIELD_NAME, OVERWRITE)

HBox(children=(FloatProgress(value=0.0, description='processed', max=131.0, style=ProgressStyle(description_wi…

HBox(children=(FloatProgress(value=0.0, description='failed', max=131.0, style=ProgressStyle(description_width…





In [None]:
AGENCY = 'mthood-usfs'
FIELD_NAME = 'PLNT_ASSOC'

discover_veg_types(AGENCY, FIELD_NAME)

HBox(children=(FloatProgress(value=0.0, max=126.0), HTML(value='')))




{'AB',
 'AG',
 'AR',
 'AX',
 'CX',
 'FM',
 'FW',
 'GB',
 'GM',
 'GS',
 'GX',
 'HX',
 'MD',
 'MM',
 'MS',
 'MT',
 'MX',
 'NC',
 'NI',
 'NM',
 'NR',
 'NS',
 'NT',
 'NX',
 None,
 'SC',
 'SM',
 'SS',
 'SW',
 'SX',
 'WL',
 'WR',
 'WX'}

In [None]:
AGENCY = 'mthood-usfs'
FIELD_NAME = 'PLNT_ASSOC'
OVERWRITE = False

failed_paths = classify_agency_stands(AGENCY, FIELD_NAME, OVERWRITE)

HBox(children=(FloatProgress(value=0.0, description='processed', max=126.0, style=ProgressStyle(description_wi…

HBox(children=(FloatProgress(value=0.0, description='failed', max=126.0, style=ProgressStyle(description_width…





In [None]:
AGENCY = 'gp-usfs'
FIELD_NAME = 'ECOCLASS'

discover_veg_types(AGENCY, FIELD_NAME, trim=2)

HBox(children=(FloatProgress(value=0.0, max=170.0), HTML(value='')))




{'AB',
 'AC',
 'AD',
 'AG',
 'AO',
 'AQ',
 'AR',
 'AX',
 'CA',
 'CC',
 'CD',
 'CE',
 'CF',
 'CH',
 'CL',
 'CM',
 'CW',
 'FS',
 'GS',
 'HA',
 'HB',
 'HC',
 'HO',
 'MD',
 'MM',
 'MS',
 'MW',
 'NA',
 'NC',
 'NF',
 'NI',
 'NL',
 'NM',
 'NQ',
 'NR',
 'NT',
 'NX',
 None,
 'SD',
 'SM',
 'SS',
 'SW',
 'SX',
 'WL',
 'WR',
 'WX'}

In [None]:
AGENCY = 'gp-usfs'
FIELD_NAME = 'ECOCLASS'
OVERWRITE = False

failed_paths = classify_agency_stands(AGENCY, FIELD_NAME, OVERWRITE, trim=2)

HBox(children=(FloatProgress(value=0.0, description='processed', max=170.0, style=ProgressStyle(description_wi…

HBox(children=(FloatProgress(value=0.0, description='failed', max=170.0, style=ProgressStyle(description_width…





In [None]:
AGENCY = 'blm'
FIELD_NAME = 'COVRTYPE'

discover_veg_types(AGENCY, FIELD_NAME)

HBox(children=(FloatProgress(value=0.0, max=73.0), HTML(value='')))




{'FCO',
 'FHD',
 'FMX',
 'FNS',
 'NA',
 'NB',
 'NG',
 'NH',
 'NO',
 'NR',
 'NU',
 'NW',
 None}

In [None]:
AGENCY = 'blm'
FIELD_NAME = 'COVRTYPE'
OVERWRITE = False

failed_paths = classify_agency_stands(AGENCY, FIELD_NAME, OVERWRITE)

HBox(children=(FloatProgress(value=0.0, description='processed', max=73.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='failed', max=73.0, style=ProgressStyle(description_width=…





In [None]:
AGENCY = 'dnr'
FIELD_NAME = 'LAND_COV'

discover_veg_types(AGENCY, FIELD_NAME)

HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))




{0,
 10,
 15,
 16,
 19,
 22,
 23,
 25,
 36,
 41,
 42,
 43,
 44,
 51,
 52,
 53,
 55,
 61,
 62,
 63,
 71,
 72,
 73,
 74,
 75,
 76}

In [None]:
AGENCY = 'dnr'
FIELD_NAME = 'LAND_COV'
OVERWRITE = False

failed_paths = classify_agency_stands(AGENCY, FIELD_NAME, OVERWRITE)

HBox(children=(FloatProgress(value=0.0, description='processed', max=549.0, style=ProgressStyle(description_wi…

HBox(children=(FloatProgress(value=0.0, description='failed', max=549.0, style=ProgressStyle(description_width…



