# Summarize Results

This notebook reads the results from each of the finalized shapefiles and writes the results to a .csv file

In [9]:
import glob
import os
import os.path as op
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import zipfile

# set working dir
HOME = op.join(op.expanduser("~"))
os.chdir(os.path.join(HOME, "git/wgms-glacier-project"))

# Set glacier and ice catchment region numbers
region = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']
catch_region = ['3', '4', '5', '6', '7', '8', '9', '10', '17', '19']

## Glaciers

In [10]:
# Unzip the glacier shapefiles
for x in region:
    # Read Region 19 separately because they have a different naming convention
    if x == "19":
        mainland_glacier_fp = "data/final-dataset/region-" + x + "-mainland-largest-glaciers.zip"
        with zipfile.ZipFile(mainland_glacier_fp,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
            
        island_glacier_fp = "data/final-dataset/region-" + x + "-islands-largest-glaciers.zip"
        with zipfile.ZipFile(island_glacier_fp,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
    else:
        glacier_zipfile_fn = "data/final-dataset/region-" + x + "-largest-glaciers.zip"
        with zipfile.ZipFile(glacier_zipfile_fn,"r") as zip_ref:
            zip_ref.extractall("data/final-dataset/unzipped")
        


In [11]:
# Open finalized glacier shapefiles and concatenate them to a single data frame
for x in region:
    if x == '1':
        glacier_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-largest-glaciers.shp"
        glacier_regions = gpd.read_file(glacier_shapefile_fn)

    # Read Regioin 19 files
    elif x == "19":
        mainland_glacier_shapefile = "data/final-dataset/unzipped/region-" + x + "-mainland-largest-glaciers.shp"
        glacier_regions_part = gpd.read_file(mainland_glacier_shapefile)
        glacier_regions = glacier_regions.append(glacier_regions_part)
        
        island_glacier_shapefile = "data/final-dataset/unzipped/region-" + x + "-islands-largest-glaciers.shp"
        glacier_regions_part = gpd.read_file(island_glacier_shapefile)
        glacier_regions = glacier_regions.append(glacier_regions_part)  
        
    else:
        glacier_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-largest-glaciers.shp"
        glacier_regions_part = gpd.read_file(glacier_shapefile_fn)
        glacier_regions = glacier_regions.append(glacier_regions_part)

In [12]:
# Write glacier dataframe to csv
csv_glacier_fp = "data/final-dataset/compiled-glacier-sizes.csv"
if os.path.exists(csv_glacier_fp) == False:
    glacier_regions.to_csv(csv_glacier_fp, encoding='utf-8-sig', 
                           index=False, columns=['region_no', 'reg_name', 'glac_name', 
                                                 'glims_id', 'primeclass', 'area_km2', 'date'])
    print("Creating csv file: " + csv_glacier_fp)
else:
    print(csv_glacier_fp + " already extists")

data/final-dataset/compiled-glacier-sizes.csv already extists


## Ice Catchments

In [13]:
# Unzip the ice catchment shapefiles
for x in catch_region:
    ic_zipfile_fn = "data/final-dataset/region-" + x + "-largest-complexes.zip"
    with zipfile.ZipFile(ic_zipfile_fn,"r") as zip_ref:
        zip_ref.extractall("data/final-dataset/unzipped")
        
# This section no longer needed becuase not spliting up region 7, may add it back for region 19 though. TBD
#    if x == "7":
#        # Unzip region 7 separately because its file naming convention is different
#        jan_mayen_ic_fp = "data/final-dataset/region-" + x + "-jan-mayen-largest-ice-caps.zip"
#        with zipfile.ZipFile(jan_mayen_ic_fp,"r") as zip_ref:
#            zip_ref.extractall("data/final-dataset/unzipped")
#            
#        svalbard_ic_fp = "data/final-dataset/region-" + x + "-svalbard-largest-ice-caps.zip"
#        with zipfile.ZipFile(svalbard_ic_fp,"r") as zip_ref:
#            zip_ref.extractall("data/final-dataset/unzipped")

In [14]:
# Open finalized ice catchment shapefiles and concatenate them to a single data frame
for x in catch_region:
    ic_shapefile_fn = "data/final-dataset/unzipped/region-" + x + "-largest-complexes.shp"
    if x == "3":
        ic_regions = gpd.read_file(ic_shapefile_fn)

# This section no longer needed becuase not spliting up region 7, may add it back for region 19 though. TBD
#    elif x == "7":
#        jan_mayen_ic_shapefile = "data/final-dataset/unzipped/region-" + x + "-jan-mayen-largest-ice-caps.shp"
#        ic_regions_part = gpd.read_file(jan_mayen_ic_shapefile)
#        ic_regions = ic_regions.append(ic_regions_part)
#        
#        svalbard_ic_shapefile = "data/final-dataset/unzipped/region-" + x + "-svalbard-largest-ice-caps.shp"
#        ic_regions_part = gpd.read_file(svalbard_ic_shapefile)
#        ic_regions = ic_regions.append(ic_regions_part)  
        
    else:
        ic_regions_part = gpd.read_file(ic_shapefile_fn)
        ic_regions = ic_regions.append(ic_regions_part)

In [15]:
# Write ice complex dataframe to csv
csv_catchment_fp = "data/final-dataset/compiled-complex-sizes.csv"
if os.path.exists(csv_catchment_fp) == False:
    ic_regions.to_csv(csv_catchment_fp, encoding='utf-8-sig', index=False, columns=['region_no', 'reg_name', 'ic_name', 
                                                                                    'primeclass', 'area_km2', 'min_date', 
                                                                                    'max_date'])
    print("Creating csv file: " + csv_catchment_fp)
else:
    print(csv_catchment_fp + " already extists")

data/final-dataset/compiled-complex-sizes.csv already extists


## Clean up

In [16]:
# Clean up unzipped files to save disk space
filelist = glob.glob("data/final-dataset/unzipped/*")
for f in filelist:
    os.remove(f)