# Compare GLIMS and RGI Data
Author: Ann Windnagel

Date: 3/10/19

This notebook does a comparison of GLIMS and RGI data to determine the 10 largest glaciers in each of the 19 world glacier regions and saves those to csv files; one for each region for GLIMS and RGI for a total of 38 output files.

Using those csv files, the 5 largest glaciers are selected from GLIMS and RGI and those are saved to a shapefile for each region.

## Import packages

In [1]:
import os
import os.path as op
import sys
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Polygon

# set working dir
HOME = op.join(op.expanduser("~"))
os.chdir(os.path.join(HOME, "git/wgms-glacier-project"))

# Set up path to load scripts
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import scripts.wgms_scripts as ws

In [2]:
# set region numbers
region_no = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]

### GLIMS GLIMS GLIMS GLIMS
----------------------------------------------

In [3]:
# Use the ten_largest function to create the 19 csv files for GLIMS
for region in region_no:
    glims_region_fp = "data/glims/processed/cleaned/glims_region_" + str(region) + "_cleaned.shp"
    glims_polygons = gpd.read_file(glims_region_fp)
    ws.ten_largest(glims_polygons, region, "GLIMS")

GLIMS Region 1 largest 10 CSV file already exists
GLIMS Region 2 largest 10 CSV file already exists
GLIMS Region 3 largest 10 CSV file already exists
GLIMS Region 4 largest 10 CSV file already exists
GLIMS Region 5 largest 10 CSV file already exists
GLIMS Region 6 largest 10 CSV file already exists
GLIMS Region 7 largest 10 CSV file already exists
GLIMS Region 8 largest 10 CSV file already exists
GLIMS Region 9 largest 10 CSV file already exists
GLIMS Region 10 largest 10 CSV file already exists
GLIMS Region 11 largest 10 CSV file already exists
GLIMS Region 12 largest 10 CSV file already exists
13
GLIMS Region 14 largest 10 CSV file already exists
GLIMS Region 15 largest 10 CSV file already exists
GLIMS Region 16 largest 10 CSV file already exists
GLIMS Region 17 largest 10 CSV file already exists
GLIMS Region 18 largest 10 CSV file already exists
GLIMS Region 19 largest 10 CSV file already exists


### RGI RGI RGI RGI
------------------------------------------------

In [4]:
# Use the ten_largest function to create the 19 csv files for RGI
for region in region_no:
    #rgi_region_fp = "data/rgi/processed/largest/rgi_region_" + str(region) + "_cleaned.shp"
    rgi_polygons = ws.open_rgi_region(region)
    ws.ten_largest(rgi_polygons, region, "RGI")

RGI Region 1 largest 10 CSV file already exists
RGI Region 2 largest 10 CSV file already exists
RGI Region 3 largest 10 CSV file already exists
RGI Region 4 largest 10 CSV file already exists
RGI Region 5 largest 10 CSV file already exists
RGI Region 6 largest 10 CSV file already exists
RGI Region 7 largest 10 CSV file already exists
RGI Region 8 largest 10 CSV file already exists
RGI Region 9 largest 10 CSV file already exists
RGI Region 10 largest 10 CSV file already exists
RGI Region 11 largest 10 CSV file already exists
RGI Region 12 largest 10 CSV file already exists
RGI Region 13 largest 10 CSV file already exists
RGI Region 14 largest 10 CSV file already exists
RGI Region 15 largest 10 CSV file already exists
RGI Region 16 largest 10 CSV file already exists
RGI Region 17 largest 10 CSV file already exists
RGI Region 18 largest 10 CSV file already exists
RGI Region 19 largest 10 CSV file already exists


### Select 5 largest glaicers

### GLIMS GLIMS GLIMS GLIMS

In [5]:
for region in region_no:
    # Open cleaned GLIMS shapefile for each region
    glims_region_fp = "data/glims/processed/cleaned/glims_region_" + str(region) + "_cleaned.shp"
    glims_polygons = gpd.read_file(glims_region_fp)

    # Open GLIMS csv file with 10 largest glaciers
    glims_largest_csv = ws.print_10_largest_glims(region, do_print='false')
    
    # Select 5 largest from GLIMS current region
    glims_largest_name_1 = glims_largest_csv.iloc[0:1]
    glims_largest_pd_1 = glims_polygons[glims_polygons['glac_id']==glims_largest_name_1['glac_id'][0]]

    glims_largest_name_2 = glims_largest_csv.iloc[1:2]
    glims_largest_pd_2 = glims_polygons[glims_polygons['glac_id']==glims_largest_name_2['glac_id'][1]]

    glims_largest_name_3 = glims_largest_csv.iloc[2:3]
    glims_largest_pd_3 = glims_polygons[glims_polygons['glac_id']==glims_largest_name_3['glac_id'][2]]
    
    glims_largest_name_4 = glims_largest_csv.iloc[3:4]
    glims_largest_pd_4 = glims_polygons[glims_polygons['glac_id']==glims_largest_name_4['glac_id'][3]]
    
    glims_largest_name_5 = glims_largest_csv.iloc[4:5]
    glims_largest_pd_5 = glims_polygons[glims_polygons['glac_id']==glims_largest_name_5['glac_id'][4]]    
    
    # Save 5 largest from GLIMS for current region to shapefile
    ws.save_5_largest(glims_largest_pd_1, glims_largest_pd_2, glims_largest_pd_3, 
                      glims_largest_pd_4, glims_largest_pd_5, region, 'GLIMS')

data/glims/processed/largest/glims_region_1_largest.shp file already exists
data/glims/processed/largest/glims_region_2_largest.shp file already exists
data/glims/processed/largest/glims_region_3_largest.shp file already exists
data/glims/processed/largest/glims_region_4_largest.shp file already exists
data/glims/processed/largest/glims_region_5_largest.shp file already exists
data/glims/processed/largest/glims_region_6_largest.shp file already exists
data/glims/processed/largest/glims_region_7_largest.shp file already exists
data/glims/processed/largest/glims_region_8_largest.shp file already exists
data/glims/processed/largest/glims_region_9_largest.shp file already exists
data/glims/processed/largest/glims_region_10_largest.shp file already exists
data/glims/processed/largest/glims_region_11_largest.shp file already exists
data/glims/processed/largest/glims_region_12_largest.shp file already exists
Creating file data/glims/processed/largest/glims_region_13_largest.shp
data/glims/pro

### RGI RGI RGI RGI

In [6]:
for region in region_no:
    # Open RGI regional shapefile
    rgi_polygons = ws.open_rgi_region(region)

    # Open RGI Region csv file with 10 largest glaciers
    rgi_largest = ws.print_10_largest_rgi(region, do_print='false')
    
    # Select 5 largest from RGI Region
    rgi_largest_name_1 = rgi_largest.iloc[0:1]
    rgi_largest_pd_1 = rgi_polygons[rgi_polygons['GLIMSId']==rgi_largest_name_1['GLIMSId'][0]]

    rgi_largest_name_2 = rgi_largest.iloc[1:2]
    rgi_largest_pd_2 = rgi_polygons[rgi_polygons['GLIMSId']==rgi_largest_name_2['GLIMSId'][1]]

    rgi_largest_name_3 = rgi_largest.iloc[2:3]
    rgi_largest_pd_3 = rgi_polygons[rgi_polygons['GLIMSId']==rgi_largest_name_3['GLIMSId'][2]]
    
    rgi_largest_name_4 = rgi_largest.iloc[3:4]
    rgi_largest_pd_4 = rgi_polygons[rgi_polygons['GLIMSId']==rgi_largest_name_4['GLIMSId'][3]]
    
    rgi_largest_name_5 = rgi_largest.iloc[4:5]
    rgi_largest_pd_5 = rgi_polygons[rgi_polygons['GLIMSId']==rgi_largest_name_5['GLIMSId'][4]]
    
    # Save 5 largest from RGI for current region to shapefile
    ws.save_5_largest(rgi_largest_pd_1, rgi_largest_pd_2, rgi_largest_pd_3, 
                      rgi_largest_pd_4, rgi_largest_pd_5, region, 'RGI')

data/rgi/processed/largest/rgi_region_1_largest.shp file already exists
data/rgi/processed/largest/rgi_region_2_largest.shp file already exists
data/rgi/processed/largest/rgi_region_3_largest.shp file already exists
data/rgi/processed/largest/rgi_region_4_largest.shp file already exists
data/rgi/processed/largest/rgi_region_5_largest.shp file already exists
data/rgi/processed/largest/rgi_region_6_largest.shp file already exists
data/rgi/processed/largest/rgi_region_7_largest.shp file already exists
data/rgi/processed/largest/rgi_region_8_largest.shp file already exists
data/rgi/processed/largest/rgi_region_9_largest.shp file already exists
data/rgi/processed/largest/rgi_region_10_largest.shp file already exists
data/rgi/processed/largest/rgi_region_11_largest.shp file already exists
data/rgi/processed/largest/rgi_region_12_largest.shp file already exists
data/rgi/processed/largest/rgi_region_13_largest.shp file already exists
data/rgi/processed/largest/rgi_region_14_largest.shp file al

#### RGI Jan Mayen Region 7

In [7]:
# For RGI want to also save the 3 largest glaciers in Jan Mayen (Region 7)
# Only need to save 3 largest for this area (not 5)
# The largest glaciers in Region 7 are in Svalbard but want to do a separate analysis of Jan Mayen

# Open RGI Region 7 glacier files
rgi_polygons = ws.open_rgi_region(7)

# Create a clipping polygon for Jan Mayan for plotting the Jan Mayen glaciers
# Create dataframe that holds the clipping box
jan_mayen_points = Polygon([(-9.5691, 71.5205), (-7.2620, 71.5205),
                                 (-7.2620, 70.5136), (-9.5691, 70.5136), 
                                 (-9.5691, 71.5205)])
jan_mayen_gdf = gpd.GeoDataFrame([1],
                                 geometry=[jan_mayen_points],
                                 crs={'init': 'epsg:4362'})

# Find the RGI outlines that lie within the jan mayen outline
jan_mayen_pip_mask = ws.pip(rgi_polygons, jan_mayen_gdf)

# Pass pip_mask into data to get the ones that are in the specified region
jan_mayen_region = rgi_polygons.loc[jan_mayen_pip_mask]

# Select the 3 largest jan mayen glaciers
jan_mayen_largest_df = jan_mayen_region[['RGIId', 'GLIMSId', 'Area', 'Name', 'BgnDate', 'geometry']].nlargest(3, 'Area')

# Save 3 largest from specified region to shapefile
largest_jm_3_fp = "data/rgi/processed/largest/rgi_region_7_jan_mayen_largest.shp"
if os.path.exists(largest_jm_3_fp) == False:
    print("Creating file " + largest_jm_3_fp)
    jan_mayen_largest_df.to_file(driver='ESRI Shapefile', filename=largest_jm_3_fp)
else:
    print(largest_jm_3_fp + " already exists")

data/rgi/processed/largest/rgi_region_7_jan_mayen_largest.shp already exists


In [8]:
# Save the ten largest to csv file
ten_jm_largest_df = jan_mayen_region[['GLIMSId', 'Area', 'Name', 'BgnDate']].nlargest(10, 'Area')

# Save to csv file if it doesn't already exist
rgi_jm_largest_csv_fp = "data/rgi/processed/largest/rgi_region_7_jan_mayen_largest.csv"
if os.path.exists(rgi_jm_largest_csv_fp) == False:
    print("Creating file " + rgi_jm_largest_csv_fp)
    ten_jm_largest_df.to_csv(rgi_jm_largest_csv_fp, index=False)
else:
    print(rgi_jm_largest_csv_fp + " already exists")

data/rgi/processed/largest/rgi_region_7_jan_mayen_largest.csv already exists
