# Summarize Results

This notebook reads the results from each of the finalized shapefiles and writes the results to a .csv file

In [1]:
import glob
import os
import os.path as op
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import zipfile

# set working dir
HOME = op.join(op.expanduser("~"))
os.chdir(os.path.join(HOME, "git/wgms-glacier-project"))

# Set glacier and ice catchment region numbers
region = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']
#catch_region = ['3', '4', '5', '6', '7', '8', '9', '10', '17', '19']

## Glaciers

In [2]:
# Unzip the glacier shapefiles
for x in region:
    # Read Region 19 separately because they have a different naming convention
    if x == "19":
        mainland_glacier_fp = "data/final-dataset/region-" + x + "-mainland-largest-glaciers.zip"
        with zipfile.ZipFile(mainland_glacier_fp,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
            
        island_glacier_fp = "data/final-dataset/region-" + x + "-islands-largest-glaciers.zip"
        with zipfile.ZipFile(island_glacier_fp,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
    else:
        glacier_zipfile_fn = "data/final-dataset/region-" + x + "-largest-glaciers.zip"
        with zipfile.ZipFile(glacier_zipfile_fn,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
        


In [3]:
# Open finalized glacier shapefiles and concatenate them to a single data frame
for x in region:
    # Read the first region
    if x == '1':
        glacier_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-largest-glaciers.shp"
        glacier_regions = gpd.read_file(glacier_shapefile_fn)

    # Read Regioin 19 files
    elif x == "19":
        mainland_glacier_shapefile = "data/final-dataset/unzipped/region-" + x + "-mainland-largest-glaciers.shp"
        glacier_regions_part = gpd.read_file(mainland_glacier_shapefile)
        glacier_regions = glacier_regions.append(glacier_regions_part)
        
        island_glacier_shapefile = "data/final-dataset/unzipped/region-" + x + "-islands-largest-glaciers.shp"
        glacier_regions_part = gpd.read_file(island_glacier_shapefile)
        glacier_regions = glacier_regions.append(glacier_regions_part)  
   
    # Read all the other regions
    else:
        glacier_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-largest-glaciers.shp"
        glacier_regions_part = gpd.read_file(glacier_shapefile_fn)
        glacier_regions = glacier_regions.append(glacier_regions_part)

In [4]:
# Write glacier dataframe to csv
csv_glacier_fp = "data/final-dataset/compiled-glacier-sizes.csv"
if os.path.exists(csv_glacier_fp) == False:
    glacier_regions.to_csv(csv_glacier_fp, encoding='utf-8-sig', 
                           index=False, columns=['region_no', 'reg_name', 'glac_name', 
                                                 'glims_id', 'primeclass', 'area_km2', 'date'])
    print("Creating csv file: " + csv_glacier_fp)
else:
    print(csv_glacier_fp + " already extists")

data/final-dataset/compiled-glacier-sizes.csv already extists


## Ice Catchments

In [5]:
# Unzip the ice catchment shapefiles
for x in region:
    # Set up special case for Region 19 - Antarctica
    if x == "19":
        ic_zipfile_fn = "data/final-dataset/region-" + x + "-islands-largest-complexes.zip"
        
        with zipfile.ZipFile(ic_zipfile_fn,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
            
        ic_zipfile_fn = "data/final-dataset/region-" + x + "-islands-largest-complexes-clipped.zip"
        
        with zipfile.ZipFile(ic_zipfile_fn,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
    elif x == "5" or x == "7" or x == "9":
        ic_zipfile_fn = "data/final-dataset/region-" + x + "-largest-complexes.zip"
        
        with zipfile.ZipFile(ic_zipfile_fn,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
            
        ic_zipfile_fn = "data/final-dataset/region-" + x + "-largest-complexes-clipped.zip"
        
        with zipfile.ZipFile(ic_zipfile_fn,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
    else:
        ic_zipfile_fn = "data/final-dataset/region-" + x + "-largest-complexes.zip"
        
    with zipfile.ZipFile(ic_zipfile_fn,"r") as zip_ref:
        zip_ref.extractall("data/final-dataset/unzipped")

In [6]:
# Open finalized ice catchment shapefiles and concatenate them to a single data frame
for x in region:
    ic_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-largest-complexes.shp"
    
    # Read first shapefile
    if x == "1":
        ic_regions = gpd.read_file(ic_shapefile_fn)
        ic_regions['clipped'] = 0
        
    # Read region 19 shapefile
    elif x == "19":
        ic_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-islands-largest-complexes.shp"
        ic_regions_rgi = gpd.read_file(ic_shapefile_fn)
        ic_regions_rgi['clipped'] = 0
        # drop rgi_ids column
        ic_regions_part = ic_regions_rgi.drop(['rgi_ids'], axis=1)
        ic_regions = ic_regions.append(ic_regions_part)
        
        ic_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-islands-largest-complexes-clipped.shp"
        ic_regions_rgi = gpd.read_file(ic_shapefile_fn)
        ic_regions_rgi['clipped'] = 1
        # drop rgi_ids column
        ic_regions_part = ic_regions_rgi.drop(['rgi_ids'], axis=1)
        ic_regions = ic_regions.append(ic_regions_part)
    
    # Read regions 5, 7, and 9
    elif x == "5" or x == "7" or x == "9":
        ic_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-largest-complexes.shp"
        ic_regions_part = gpd.read_file(ic_shapefile_fn)
        ic_regions_part['clipped'] = 0
        ic_regions = ic_regions.append(ic_regions_part)
        
        ic_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-largest-complexes-clipped.shp"        
        ic_regions_part = gpd.read_file(ic_shapefile_fn)
        ic_regions_part['clipped'] = 1
        ic_regions = ic_regions.append(ic_regions_part)

    # Read all the other region shapefiles
    else:
        ic_regions_part = gpd.read_file(ic_shapefile_fn)
        ic_regions_part['clipped'] = 0
        ic_regions = ic_regions.append(ic_regions_part)

In [7]:
# TBD TBD TBD TBD
# TBD Add rgi_ids back in for Region 19, this column will be N/A for all other regions
ic_regions[25:38]

Unnamed: 0,region_no,reg_name,ic_name,primeclass,area_km2,min_date,max_date,glims_ids,geometry,clipped
1,7,Svalbard and Jan Mayen,Vestfonna Ice Cap,3,2372.805757,2008-08-14,2008-08-14,"G020188E80148N, G021814E80001N, G020521E80202N...","POLYGON ((19.215861 79.764661, 19.215788 79.76...",1
2,7,Svalbard and Jan Mayen,Asgardfonna Ice Cap,3,1586.593843,2008-06-07,2008-06-07,"G016639E79309N, G016493E79791N, G017153E79795N...",(POLYGON ((16.75903047879151 79.22311940240132...,1
0,8,Scandinavia,Jostedalsbreen Icefield,2,533.132714,2006-09-16,2006-09-16,"G006998E61646N, G006712E61508N, G006785E61632N...","POLYGON ((6.8937 61.517777, 6.893516 61.517774...",0
1,8,Scandinavia,Western Svartisen Icefield,2,225.507169,1999-09-07,1999-09-07,"G014011E66745N, G013784E66582N, G013736E66590N...","POLYGON ((13.838489 66.549521, 13.837876 66.54...",0
2,8,Scandinavia,Southern Folgefonna Icefield,2,176.098646,2002-09-13,2002-09-13,"G006261E60022N, G006363E60053N, G006284E60028N...","POLYGON ((6.321743 59.938767, 6.321196 59.9387...",0
0,9,Russian Arctic,Severny Island Northern Ice Cap,2,21072.123177,2004-07-19,2015-08-05,"G067156E76738N, G065077E76240N, G056338E74583N...","POLYGON ((56.918251 74.913927, 56.916186 74.91...",0
1,9,Russian Arctic,Academy of Sciences Ice Cap,3,5583.439404,2006-07-13,2006-07-13,"G095561E80299N, G094143E80332N, G095272E80187N...","POLYGON ((94.513183 80.150367, 94.504973000000...",0
2,9,Russian Arctic,Karpinsky/University Glacier Complex,2,4054.455204,2001-06-21,2001-06-21,"G099320E79182N, G099434E78915N, G097944E79558N...","POLYGON ((99.511706 78.849071, 99.507997 78.84...",0
0,9,Russian Arctic,Severny Island Northern Ice Cap,3,21072.123177,2004-07-19,2015-08-05,"G067156E76738N, G065077E76240N, G056338E74583N...","POLYGON ((56.918251 74.913927, 56.916186 74.91...",1
1,9,Russian Arctic,Academy of Sciences Ice Cap,3,5583.439404,2006-07-13,2006-07-13,"G095561E80299N, G094143E80332N, G095272E80187N...","POLYGON ((94.513183 80.150367, 94.504973000000...",1


In [8]:
# Write ice complex dataframe to csv
csv_catchment_fp = "data/final-dataset/compiled-complex-sizes.csv"
if os.path.exists(csv_catchment_fp) == False:
    ic_regions.to_csv(csv_catchment_fp, encoding='utf-8-sig', index=False, columns=['region_no', 'reg_name', 
                                                                                    'ic_name', 'primeclass', 
                                                                                    'area_km2', 'min_date',
                                                                                    'max_date', 'glims_ids', 
                                                                                    'clipped'])
    print("Creating csv file: " + csv_catchment_fp)
else:
    print(csv_catchment_fp + " already extists")

Creating csv file: data/final-dataset/compiled-complex-sizes.csv


## Clean up

In [9]:
# Clean up unzipped files to save disk space
filelist = glob.glob("data/final-dataset/unzipped/*")
for f in filelist:
    os.remove(f)