# Compare GLIMS and RGI Data
Author: Ann Windnagel

Date: 3/10/19

This notebook does a comparison of GLIMS and RGI data to determine the 10 largest glaciers in each of the 19 world glacier regions.

## Import packages

In [1]:
import os
import os.path as op
import sys
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd

# set working dir
HOME = op.join(op.expanduser("~"))
os.chdir(os.path.join(HOME, "git/wgms-glacier-project"))

# Set up path to load scripts
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import scripts.wgms_scripts as ws

## Function Definition

In [2]:
def ten_largest(data, region_no, source):
    '''
    Finds the 10 largest glaciers in a region and saves them to a csv file

    Parameters
    ----------
    data : Geodataframe containing all glacier polygons for a region
    region_no : Integer with the region number. Accepted values are 1 through 19.
    source :  String with the source of the glacier outlines. Accepted values are GLIMS or RGI

    Returns
    ----------
    nothing: Saves a csv file of the 10 largest glaciers for a region
    '''
    
    if source == 'GLIMS':
        # Find 10 largest
        ten_largest_df = data[['glac_id', 'db_area', 'glac_name', 'src_date']].nlargest(10, 'db_area')
        
        # Save to csv file if it doesn't already exist
        glims_largest_csv_fp = "data/glims/processed/largest/glims_region_" + str(region_no) + "_largest.csv"
        if os.path.exists(glims_largest_csv_fp) == False:
            print(region_no)
            ten_largest_df.to_csv(glims_largest_csv_fp, index=False)
        
    elif source == 'RGI':
        # Find 10 largest
        ten_largest_df = data[['GLIMSId', 'Area', 'Name', 'BgnDate']].nlargest(10, 'Area')
        
        # Save to csv file if it doesn't already exist
        rgi_largest_csv_fp = "data/rgi/processed/largest/rgi_region_" + str(region_no) + "_largest.csv"
        if os.path.exists(rgi_largest_csv_fp) == False:
            print(region_no)
            ten_largest_df.to_csv(rgi_largest_csv_fp, index=False)
        
    else:
        print("Incorrect source input")
    
    return

## Region 1

### GLIMS GLIMS GLIMS GLIMS
----------------------------------------------

In [3]:
# Use the ten_largest function to create the 19 csv files for GLIMS
region_no = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]
for region in region_no:
    glims_region_fp = "data/glims/processed/cleaned/glims_region_" + str(region) + "_cleaned.shp"
    glims_polygons = gpd.read_file(glims_region_fp)
    ten_largest(glims_polygons, region, "GLIMS")

### RGI RGI RGI RGI
------------------------------------------------

In [4]:
# Use the ten_largest function to create the 19 csv files for RGI
region_no = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]
for region in region_no:
    #rgi_region_fp = "data/rgi/processed/largest/rgi_region_" + str(region) + "_cleaned.shp"
    rgi_polygons = ws.open_rgi_region(region)
    ten_largest(rgi_polygons, region, "RGI")

01_rgi60_Alaska/01_rgi60_Alaska.shp
02_rgi60_WesternCanadaUS/02_rgi60_WesternCanadaUS.shp
03_rgi60_ArcticCanadaNorth/03_rgi60_ArcticCanadaNorth.shp
04_rgi60_ArcticCanadaSouth/04_rgi60_ArcticCanadaSouth.shp
05_rgi60_GreenlandPeriphery/05_rgi60_GreenlandPeriphery.shp
06_rgi60_Iceland/06_rgi60_Iceland.shp
07_rgi60_Svalbard/07_rgi60_Svalbard.shp
08_rgi60_Scandinavia/08_rgi60_Scandinavia.shp
09_rgi60_RussianArctic/09_rgi60_RussianArctic.shp
10_rgi60_NorthAsia/10_rgi60_NorthAsia.shp
11_rgi60_CentralEurope/11_rgi60_CentralEurope.shp
12_rgi60_CaucasusMiddleEast/12_rgi60_CaucasusMiddleEast.shp
13_rgi60_CentralAsia/13_rgi60_CentralAsia.shp
14_rgi60_SouthAsiaWest/14_rgi60_SouthAsiaWest.shp
15_rgi60_SouthAsiaEast/15_rgi60_SouthAsiaEast.shp
16_rgi60_LowLatitudes/16_rgi60_LowLatitudes.shp
17_rgi60_SouthernAndes/17_rgi60_SouthernAndes.shp
18_rgi60_NewZealand/18_rgi60_NewZealand.shp
18
19_rgi60_AntarcticSubantarctic/19_rgi60_AntarcticSubantarctic.shp
19


### Extra Code

### GLIMS

In [None]:
# Open GLIMS region 1 - Alaska
glims_region_1_fp = "data/glims/processed/cleaned/glims_region_1_cleaned.shp"
glims_polygons_r1 = gpd.read_file(glims_region_1_fp)
glims_polygons_r1.head()

In [None]:
# Get ten largest
glims_ten_max = glims_polygons_r1[['glac_id', 'db_area', 'glac_name', 'src_date']].nlargest(10, 'db_area')
glims_ten_max

In [None]:
# test ten_largest for GLIMS
ten_largest(glims_polygons_r1, 1, 'GLIMS')

In [None]:
# GLIMS dataframe shape
glims_polygons_r1.shape

In [None]:
# Find the maximum db_area for GLIMS
max_area_glims = glims_polygons_r1['db_area'].max()
max_area_glims

In [None]:
# Get index of the record with the maximum db_area for GLIMS
glims_id_max = glims_polygons_r1['db_area'].idxmax()

In [None]:
# View the record with the max for GLIMS
glims_polygons_r1.loc[glims_polygons_r1['db_area'].idxmax()]

In [None]:
# Get the max GLIMS in a dataframe
glims_max = glims_polygons_r1.iloc[63:64]
glims_max

### RGI

In [None]:
# Open RGI region 01 - Alaska
rgi_region01_polygons = ws.open_rgi_region(1)
rgi_region01_polygons.head()

In [None]:
# List ten biggest glaciers from RGI in region 1
rgi_ten_max = rgi_region01_polygons[['GLIMSId', 'Area', 'Name', 'BgnDate']].nlargest(10, 'Area')
rgi_ten_max

In [None]:
# test ten_largest for RGI
ten_largest(rgi_region01_polygons, 1, 'RGI')

In [None]:
# RGI dataframe shape
rgi_region01_polygons.shape

In [None]:
# Get the maximum area for RGI
max_area_rgi = rgi_region01_polygons['Area'].max()
max_area_rgi

In [None]:
# Get record with the maximum area for RGI
rgi_region01_polygons['Area'].idxmax()

In [None]:
# View the record with the max for RGI
rgi_region01_polygons.loc[rgi_region01_polygons['Area'].idxmax()]

In [None]:
# Get the max RGI in a dataframe
rgi_max = rgi_region01_polygons.iloc[13691:13692]
rgi_max