In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd

# WGMS

## 1. GLACIER
General (and presumably static) information about each glacier

In [3]:
glacier = pd.read_csv("data/wgms/glacier.csv")

glacier_nz = glacier[glacier["POLITICAL_UNIT"] == "NZ"]

glacier_nz_drop = glacier_nz.drop(["PARENT_GLACIER", "REMARKS", "GLACIER_REGION_CODE", "GLACIER_SUBREGION_CODE"], axis=1)
glacier_nz_drop.head()

  glacier = pd.read_csv("data/wgms/glacier.csv")


Unnamed: 0,POLITICAL_UNIT,NAME,WGMS_ID,GEN_LOCATION,SPEC_LOCATION,LATITUDE,LONGITUDE,PRIM_CLASSIFIC,FORM,FRONTAL_CHARS,EXPOS_ACC_AREA,EXPOS_ABL_AREA
158448,NZ,ABEL,1546,WHATAROA,PERTH,-43.32,170.630005,4.0,7.0,8.0,S,S
158449,NZ,ADAMS,2923,WANGANUI,ADAMS,-43.32,170.720001,5.0,1.0,8.0,W,N
158450,NZ,AILSA,2924,HUMBOLDTS,CAPLES,-44.7861,168.187,6.0,4.0,4.0,S,S
158451,NZ,ALMER/SALISBURY,1548,WAIHO,WAIHO,-43.470001,170.220001,5.0,1.0,8.0,W,SW
158452,NZ,ANDY,1590,OLIVINES,WILLIAMSON,-44.43,168.369995,4.0,1.0,8.0,N,N


In [4]:
glacier_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2869 entries, 158448 to 161316
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   POLITICAL_UNIT  2869 non-null   object 
 1   NAME            2869 non-null   object 
 2   WGMS_ID         2869 non-null   int64  
 3   GEN_LOCATION    119 non-null    object 
 4   SPEC_LOCATION   118 non-null    object 
 5   LATITUDE        2869 non-null   float64
 6   LONGITUDE       2869 non-null   float64
 7   PRIM_CLASSIFIC  117 non-null    float64
 8   FORM            118 non-null    float64
 9   FRONTAL_CHARS   118 non-null    float64
 10  EXPOS_ACC_AREA  118 non-null    object 
 11  EXPOS_ABL_AREA  118 non-null    object 
dtypes: float64(5), int64(1), object(6)
memory usage: 291.4+ KB


## 2. STATE
Glacier length, area, and elevation range

In [5]:
state = pd.read_csv("data/wgms/state.csv")

state_nz = state[state["POLITICAL_UNIT"] == "NZ"]

state_nz_drop = state_nz.drop(["NAME", "SURVEY_DATE", "ELEVATION_UNC", "LENGTH_UNC", "AREA_UNC", "SURVEY_PLATFORM_METHOD", "INVESTIGATOR", "SPONS_AGENCY", "REFERENCE", "REMARKS"], axis=1)
state_nz_drop.rename(columns = {'YEAR':'STATE_YEAR'}, inplace = True)

state_nz_drop.head()

Unnamed: 0,POLITICAL_UNIT,WGMS_ID,STATE_YEAR,HIGHEST_ELEVATION,MEDIAN_ELEVATION,LOWEST_ELEVATION,LENGTH,AREA
8947,NZ,1546,1978,2225.0,1980.0,1860.0,0.7,3.45
8948,NZ,2923,1978,2470.0,1880.0,1295.0,6.6,9.96
8949,NZ,2924,1978,1830.0,1640.0,1555.0,0.7,
8950,NZ,1548,1978,2390.0,1865.0,1340.0,2.98,3.1
8951,NZ,1590,1978,2190.0,1750.0,840.0,7.1,10.49


## 3. CHANGE
Change in glacier thickness, area, and/or volume â€“ typically from geodetic surveys



In [7]:
change = pd.read_csv("data/wgms/change.csv")

change_nz = change[change["POLITICAL_UNIT"] == "NZ"]

change_nz_drop = change_nz.drop(["POLITICAL_UNIT",
                                 "NAME",
                                 "SURVEY_DATE",
                                 "REFERENCE_DATE",
                                 "LOWER_BOUND",
                                 "UPPER_BOUND",
                                 "AREA_CHANGE",  # all NaN
                                 "AREA_CHANGE_UNC",
                                 "THICKNESS_CHG_UNC",
                                 "VOLUME_CHANGE_UNC",
                                 "SD_PLATFORM_METHOD",
                                 "RD_PLATFORM_METHOD",
                                 "INVESTIGATOR",
                                 "SPONS_AGENCY",
                                 "REFERENCE",
                                 "REMARKS"], axis=1)
change_nz_drop.rename(columns = {'YEAR':'CHANGE_YEAR'}, inplace = True)
change_nz_drop.head()

Unnamed: 0,WGMS_ID,SURVEY_ID,CHANGE_YEAR,AREA_SURVEY_YEAR,THICKNESS_CHG,VOLUME_CHANGE
814867,2923,21850,2013,4.279,5913.0,
814868,2923,21851,2018,4.279,-14433.0,
814869,2923,21852,2018,4.279,-13184.0,
814870,2923,21853,2014,4.279,-7845.0,
814871,2923,21854,2017,4.279,-7478.0,


## 4. FRONT_VARIATION
Glacier length changes from in-situ and remote sensing measurements.

In [9]:
front_variation = pd.read_csv("data/wgms/front_variation.csv")

front_variation_nz = front_variation[front_variation["POLITICAL_UNIT"] == "NZ"]

front_variation_nz_drop = front_variation_nz.drop(["FRONT_VAR_UNC", 
                                                   "SURVEY_PLATFORM_METHOD", 
                                                   "INVESTIGATOR", 
                                                   "SPONS_AGENCY", 
                                                   "REFERENCE", 
                                                   "REMARKS",
                                                   "POLITICAL_UNIT",
                                                   "NAME"], axis=1)
front_variation_nz_drop.rename(columns = {'YEAR':'FRONT_VARIATION_YEAR'}, inplace = True)
front_variation_nz_drop.head()

Unnamed: 0,WGMS_ID,FRONT_VARIATION_YEAR,SURVEY_DATE,REFERENCE_DATE,FRONT_VARIATION,QUALITATIVE_VARIATION
41300,1546,1993,19930215,19890401.0,,+X
41301,1546,1994,19940310,19930215.0,,ST
41302,1546,1995,19950304,19940310.0,,ST
41303,2923,1992,19920407,19870306.0,,-X
41304,2923,1993,19930215,19920407.0,,-X


## 5. MASS_BALANCE:
Glacier mass balance measurements by elevation band.

In [11]:
mass_balance = pd.read_csv("data/wgms/mass_balance.csv")

mass_balance_nz = mass_balance[mass_balance["POLITICAL_UNIT"] == "NZ"]

mass_balance_nz_drop = mass_balance_nz.drop(["LOWER_BOUND", 
                                             "UPPER_BOUND", 
                                             "WINTER_BALANCE_UNC", 
                                             "SUMMER_BALANCE_UNC", 
                                             "ANNUAL_BALANCE_UNC",
                                             "REMARKS",
                                             "POLITICAL_UNIT",
                                             "NAME"], axis=1)
mass_balance_nz_drop.rename(columns = {'YEAR':'MASS_BALANCE_YEAR', 'AREA':'MASS_BALANCE_YEAR_AREA'}, inplace = True)
mass_balance_nz_drop.head()

Unnamed: 0,WGMS_ID,MASS_BALANCE_YEAR,MASS_BALANCE_YEAR_AREA,WINTER_BALANCE,SUMMER_BALANCE,ANNUAL_BALANCE
44238,1597,2005,2.03,2875.0,-1499.0,1376.0
44239,1597,2006,2.03,2248.0,-1557.0,691.0
44240,1597,2007,2.03,3039.0,-2347.0,692.0
44241,1597,2008,2.03,2392.0,-4090.0,-1698.0
44242,1597,2009,2.03,1975.0,-2677.0,-702.0


## 6. SPECIAL_EVENT:
Extraordinary events concerning glacier hazards and dramatic glacier changes.

In [13]:
special_event = pd.read_csv("data/wgms/special_event.csv")

special_event_nz = special_event[special_event["POLITICAL_UNIT"] == "NZ"]

special_event_nz_drop = special_event_nz.drop(["INVESTIGATOR", 
                                               "SPONS_AGENCY", 
                                               "REFERENCE", 
                                               "REMARKS",
                                               "POLITICAL_UNIT",
                                               "NAME",
                                               "EVENT_ID"], axis=1)
special_event_nz_drop.head()

Unnamed: 0,WGMS_ID,EVENT_DATE,ET_SURGE,ET_CALVING,ET_FLOOD,ET_AVALANCHE,ET_TECTONIC,ET_OTHER,EVENT_DESCRIPTION
2906,1580,19920502.0,False,False,False,False,False,True,"The rock avalanche, inspected on 5 May 1992, r..."
2907,1580,19920916.0,False,False,False,False,False,True,"The rock avalanche, inspected on 20 September ..."
2908,1074,19911214.0,False,False,False,False,True,True,Mount Cook Rock Avalanche occurred on 14 Decem...
2909,1074,19949999.0,False,False,False,False,False,True,"During a storm of January 1994, the river brea..."


## 7. RECONSTRUCTION_FRONT_VARIATION:
Glacier length changes reconstructed from historic records and geologic dating.

In [14]:
reconstruction_front_variation = pd.read_csv("data/wgms/reconstruction_front_variation.csv")

reconstruction_front_variation_nz = reconstruction_front_variation[reconstruction_front_variation["POLITICAL_UNIT"] == "NZ"]

reconstruction_front_variation_nz_drop = reconstruction_front_variation_nz.drop(["YEAR_UNC", 
                                                                                 "REF_YEAR_UNC", 
                                                                                 "FRONT_VAR_POS_UNC", 
                                                                                 "FRONT_VAR_NEG_UNC",
                                                                                 "ELEVATION_UNC", 
                                                                                 "METHOD_CODE", 
                                                                                 "METHOD_REMARKS", 
                                                                                 "REMARKS", 
                                                                                 "QUALITATIVE_VARIATION",  # all NaN
                                                                                 "LOWEST_ELEVATION", 
                                                                                 "HIGHEST_ELEVATION",
                                                                                 "MORAINE_DEFINED_MAX",
                                                                                 "POLITICAL_UNIT",
                                                                                 "NAME"], axis=1)
reconstruction_front_variation_nz_drop.rename(columns = {'YEAR':'RECONSTRUCTION_FRONT_VARIATION_YEAR'}, inplace = True)
reconstruction_front_variation_nz_drop.head()

Unnamed: 0,WGMS_ID,REC_SERIES_ID,RECONSTRUCTION_FRONT_VARIATION_YEAR,REFERENCE_YEAR,FRONT_VARIATION
1874,899,36,1780,1600.0,-560.0
1875,899,36,1820,1780.0,141.0
1876,899,36,1865,1820.0,-240.0
1877,899,36,1867,1865.0,-21.0
1878,899,36,1886,1867.0,-29.0


## dataframes after previous steps
- glacier_nz_drop
- state_nz_drop
- change_nz_drop
- front_variation_nz_drop
- mass_balance_nz_drop
- special_event_nz_drop
- reconstruction_front_variation_nz_drop

In [15]:
glacier_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2869 entries, 158448 to 161316
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   POLITICAL_UNIT  2869 non-null   object 
 1   NAME            2869 non-null   object 
 2   WGMS_ID         2869 non-null   int64  
 3   GEN_LOCATION    119 non-null    object 
 4   SPEC_LOCATION   118 non-null    object 
 5   LATITUDE        2869 non-null   float64
 6   LONGITUDE       2869 non-null   float64
 7   PRIM_CLASSIFIC  117 non-null    float64
 8   FORM            118 non-null    float64
 9   FRONTAL_CHARS   118 non-null    float64
 10  EXPOS_ACC_AREA  118 non-null    object 
 11  EXPOS_ABL_AREA  118 non-null    object 
dtypes: float64(5), int64(1), object(6)
memory usage: 291.4+ KB


In [16]:
state_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 165 entries, 8947 to 9111
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   POLITICAL_UNIT     165 non-null    object 
 1   WGMS_ID            165 non-null    int64  
 2   STATE_YEAR         165 non-null    int64  
 3   HIGHEST_ELEVATION  141 non-null    float64
 4   MEDIAN_ELEVATION   121 non-null    float64
 5   LOWEST_ELEVATION   159 non-null    float64
 6   LENGTH             127 non-null    float64
 7   AREA               105 non-null    float64
dtypes: float64(5), int64(2), object(1)
memory usage: 11.6+ KB


In [17]:
change_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45643 entries, 814867 to 860509
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   WGMS_ID           45643 non-null  int64  
 1   SURVEY_ID         45643 non-null  int64  
 2   CHANGE_YEAR       45643 non-null  int64  
 3   AREA_SURVEY_YEAR  45643 non-null  float64
 4   THICKNESS_CHG     45643 non-null  float64
 5   VOLUME_CHANGE     13790 non-null  float64
dtypes: float64(3), int64(3)
memory usage: 2.4 MB


In [18]:
front_variation_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1164 entries, 41300 to 42463
Data columns (total 6 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   WGMS_ID                1164 non-null   int64  
 1   FRONT_VARIATION_YEAR   1164 non-null   int64  
 2   SURVEY_DATE            1164 non-null   int64  
 3   REFERENCE_DATE         1163 non-null   float64
 4   FRONT_VARIATION        244 non-null    float64
 5   QUALITATIVE_VARIATION  937 non-null    object 
dtypes: float64(2), int64(3), object(1)
memory usage: 63.7+ KB


In [19]:
mass_balance_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 62 entries, 44238 to 44299
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   WGMS_ID                 62 non-null     int64  
 1   MASS_BALANCE_YEAR       62 non-null     int64  
 2   MASS_BALANCE_YEAR_AREA  60 non-null     float64
 3   WINTER_BALANCE          38 non-null     float64
 4   SUMMER_BALANCE          38 non-null     float64
 5   ANNUAL_BALANCE          62 non-null     float64
dtypes: float64(4), int64(2)
memory usage: 3.4 KB


In [20]:
special_event_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 2906 to 2909
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   WGMS_ID            4 non-null      int64  
 1   EVENT_DATE         4 non-null      float64
 2   ET_SURGE           4 non-null      bool   
 3   ET_CALVING         4 non-null      bool   
 4   ET_FLOOD           4 non-null      bool   
 5   ET_AVALANCHE       4 non-null      bool   
 6   ET_TECTONIC        4 non-null      bool   
 7   ET_OTHER           4 non-null      bool   
 8   EVENT_DESCRIPTION  4 non-null      object 
dtypes: bool(6), float64(1), int64(1), object(1)
memory usage: 152.0+ bytes


In [21]:
reconstruction_front_variation_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 1874 to 1879
Data columns (total 5 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   WGMS_ID                              6 non-null      int64  
 1   REC_SERIES_ID                        6 non-null      int64  
 2   RECONSTRUCTION_FRONT_VARIATION_YEAR  6 non-null      int64  
 3   REFERENCE_YEAR                       6 non-null      float64
 4   FRONT_VARIATION                      6 non-null      float64
dtypes: float64(2), int64(3)
memory usage: 288.0 bytes


## Map NZ Glacier

In [23]:
glacier_nz_drop["WGMS_ID"].nunique()

2869

In [22]:
# merge nz glacier that has change record
nz_glacier_change = pd.merge(glacier_nz_drop, change_nz_drop, how="right", on="WGMS_ID")
nz_glacier_change.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45643 entries, 0 to 45642
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   POLITICAL_UNIT    45643 non-null  object 
 1   NAME              45643 non-null  object 
 2   WGMS_ID           45643 non-null  int64  
 3   GEN_LOCATION      5755 non-null   object 
 4   SPEC_LOCATION     5748 non-null   object 
 5   LATITUDE          45643 non-null  float64
 6   LONGITUDE         45643 non-null  float64
 7   PRIM_CLASSIFIC    5473 non-null   float64
 8   FORM              5682 non-null   float64
 9   FRONTAL_CHARS     5682 non-null   float64
 10  EXPOS_ACC_AREA    5682 non-null   object 
 11  EXPOS_ABL_AREA    5682 non-null   object 
 12  SURVEY_ID         45643 non-null  int64  
 13  CHANGE_YEAR       45643 non-null  int64  
 14  AREA_SURVEY_YEAR  45643 non-null  float64
 15  THICKNESS_CHG     45643 non-null  float64
 16  VOLUME_CHANGE     13790 non-null  float6

In [26]:
nz_glacier_change_map = nz_glacier_change[["WGMS_ID", "NAME", "LATITUDE", "LONGITUDE"]]
nz_glacier_change_map.head()

Unnamed: 0,WGMS_ID,NAME,LATITUDE,LONGITUDE
0,2923,ADAMS,-43.32,170.720001
29,2924,AILSA,-44.7861,168.187
34,1548,ALMER/SALISBURY,-43.470001,170.220001
345,1590,ANDY,-44.43,168.369995
389,2283,AXIUS,-44.169998,168.979996


In [30]:
gdf_nz_glacier_change_map = (
    gpd.GeoDataFrame(
        nz_glacier_change_map, geometry=gpd.points_from_xy(nz_glacier_change_map.LONGITUDE, nz_glacier_change_map.LATITUDE), crs="EPSG:4326"
    )
)
gdf_nz_glacier_change_map.head()

Unnamed: 0,WGMS_ID,NAME,LATITUDE,LONGITUDE,geometry
0,2923,ADAMS,-43.32,170.720001,POINT (170.72000 -43.32000)
29,2924,AILSA,-44.7861,168.187,POINT (168.18700 -44.78610)
34,1548,ALMER/SALISBURY,-43.470001,170.220001,POINT (170.22000 -43.47000)
345,1590,ANDY,-44.43,168.369995,POINT (168.37000 -44.43000)
389,2283,AXIUS,-44.169998,168.979996,POINT (168.98000 -44.17000)


In [31]:
gdf_nz_glacier_change_map.explore()

In [33]:
nz_glacier_change["THICKNESS_CHG"].isna().sum()

0

In [34]:
nz_glacier_change["VOLUME_CHANGE"].isna().sum()

31853

In [40]:
nz_glacier_change["WGMS_ID"].nunique()

2816

In [39]:
nz_glacier_change_non_nan = nz_glacier_change[~nz_glacier_change['VOLUME_CHANGE'].isna()]
nz_glacier_change_non_nan["WGMS_ID"].nunique()

2758