In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd

# WGMS

## 1. GLACIER
General (and presumably static) information about each glacier

In [2]:
glacier = pd.read_csv("data/wgms/glacier.csv")

glacier_nz = glacier[glacier["POLITICAL_UNIT"] == "NZ"]

glacier_nz_drop = glacier_nz.drop(["POLITICAL_UNIT", "REMARKS", "GLACIER_REGION_CODE", "GLACIER_SUBREGION_CODE",
                                  "GEN_LOCATION", "SPEC_LOCATION", "PARENT_GLACIER"], axis=1)
glacier_nz_drop.head()

  glacier = pd.read_csv("data/wgms/glacier.csv")


Unnamed: 0,NAME,WGMS_ID,LATITUDE,LONGITUDE,PRIM_CLASSIFIC,FORM,FRONTAL_CHARS,EXPOS_ACC_AREA,EXPOS_ABL_AREA
158448,ABEL,1546,-43.32,170.630005,4.0,7.0,8.0,S,S
158449,ADAMS,2923,-43.32,170.720001,5.0,1.0,8.0,W,N
158450,AILSA,2924,-44.7861,168.187,6.0,4.0,4.0,S,S
158451,ALMER/SALISBURY,1548,-43.470001,170.220001,5.0,1.0,8.0,W,SW
158452,ANDY,1590,-44.43,168.369995,4.0,1.0,8.0,N,N


In [3]:
glacier_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2869 entries, 158448 to 161316
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   NAME            2869 non-null   object 
 1   WGMS_ID         2869 non-null   int64  
 2   LATITUDE        2869 non-null   float64
 3   LONGITUDE       2869 non-null   float64
 4   PRIM_CLASSIFIC  117 non-null    float64
 5   FORM            118 non-null    float64
 6   FRONTAL_CHARS   118 non-null    float64
 7   EXPOS_ACC_AREA  118 non-null    object 
 8   EXPOS_ABL_AREA  118 non-null    object 
dtypes: float64(5), int64(1), object(3)
memory usage: 224.1+ KB


## 2. STATE
Glacier length, area, and elevation range

In [4]:
state = pd.read_csv("data/wgms/state.csv")

state_nz = state[state["POLITICAL_UNIT"] == "NZ"]

state_nz_drop = state_nz.drop(["NAME", "SURVEY_DATE", "ELEVATION_UNC", 
                               "LENGTH_UNC", "AREA_UNC", "SURVEY_PLATFORM_METHOD", 
                               "INVESTIGATOR", "SPONS_AGENCY", "REFERENCE", 
                               "REMARKS", "POLITICAL_UNIT"], axis=1)
state_nz_drop.rename(columns = {'YEAR':'STATE_YEAR'}, inplace = True)

state_nz_drop.head()

Unnamed: 0,WGMS_ID,STATE_YEAR,HIGHEST_ELEVATION,MEDIAN_ELEVATION,LOWEST_ELEVATION,LENGTH,AREA
8947,1546,1978,2225.0,1980.0,1860.0,0.7,3.45
8948,2923,1978,2470.0,1880.0,1295.0,6.6,9.96
8949,2924,1978,1830.0,1640.0,1555.0,0.7,
8950,1548,1978,2390.0,1865.0,1340.0,2.98,3.1
8951,1590,1978,2190.0,1750.0,840.0,7.1,10.49


In [5]:
state_nz_drop["STATE_YEAR"].unique()

array([1978, 1995, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015, 2016, 1966, 1981, 1982, 1983, 1984, 1985, 1986, 1894,
       1867, 1965, 1975], dtype=int64)

## 3. CHANGE
Change in glacier thickness, area, and/or volume – typically from geodetic surveys



In [6]:
change = pd.read_csv("data/wgms/change.csv")

change_nz = change[change["POLITICAL_UNIT"] == "NZ"]

change_nz_drop = change_nz.drop(["POLITICAL_UNIT",
                                 "NAME",
                                 "SURVEY_ID",
                                 # "SURVEY_DATE",
                                 "REFERENCE_DATE",
                                 "LOWER_BOUND",
                                 "UPPER_BOUND",
                                 "AREA_CHANGE",  # all NaN
                                 "AREA_CHANGE_UNC",
                                 "THICKNESS_CHG_UNC",
                                 "VOLUME_CHANGE_UNC",
                                 "SD_PLATFORM_METHOD",
                                 "RD_PLATFORM_METHOD",
                                 "INVESTIGATOR",
                                 "SPONS_AGENCY",
                                 "REFERENCE",
                                 "REMARKS"], axis=1)
change_nz_drop.rename(columns = {'YEAR':'CHANGE_YEAR'}, inplace = True)
change_nz_drop.head()

Unnamed: 0,WGMS_ID,CHANGE_YEAR,SURVEY_DATE,AREA_SURVEY_YEAR,THICKNESS_CHG,VOLUME_CHANGE
814867,2923,2013,20130228,4.279,5913.0,
814868,2923,2018,20180307,4.279,-14433.0,
814869,2923,2018,20180307,4.279,-13184.0,
814870,2923,2014,20140224,4.279,-7845.0,
814871,2923,2017,20170304,4.279,-7478.0,


In [7]:
change_nz_drop["CHANGE_YEAR"].value_counts()

CHANGE_YEAR
2017    6998
2015    6584
2014    6162
2016    5848
2019    5516
2012    3764
2013    3107
2009    2853
2004    2758
2018     940
2011     720
2010     393
Name: count, dtype: int64

In [8]:
change_nz_drop[change_nz_drop['CHANGE_YEAR'] == 2009]["WGMS_ID"].nunique()

2767

In [9]:
change_nz_drop[change_nz_drop['CHANGE_YEAR'] == 2019]["WGMS_ID"].nunique()

2758

## ID refer table

In [152]:
id_refer = pd.read_csv("data/wgms/glacier_id_lut.csv")
id_refer_nz = id_refer[id_refer["POLITICAL_UNIT"] == 'NZ'][["WGMS_ID", "GLIMS_ID"]]
id_refer_nz

Unnamed: 0,WGMS_ID,GLIMS_ID
157808,1546,
157809,2923,G170734E43324S
157810,2924,
157811,1548,
157812,1590,G168373E44434S
...,...,...
160672,3037,G171385E43000S
160673,1609,
160674,1610,G170338E43417S
160675,1615,


In [153]:
id_refer_nz["WGMS_ID"].nunique()

2869

In [155]:
id_refer_nz["GLIMS_ID"].value_counts()

GLIMS_ID
G170734E43324S    1
G168578E44439S    1
G168482E44469S    1
G170802E43324S    1
G168242E44591S    1
G170913E43237S    1
G169947E43804S    1
G171526E42890S    1
G170953E43219S    1
G170579E43474S    1
G168515E44456S    1
G168373E44434S    1
G170890E43276S    1
G168764E44358S    1
G169595E44165S    1
G168421E44368S    1
G170521E43401S    1
G168556E44463S    1
G171385E43000S    1
G170338E43417S    1
G170603E43489S    1
G169130E44001S    1
G170894E43224S    1
G169959E43779S    1
G168980E44167S    1
G170637E43308S    1
G168354E44425S    1
G168713E44394S    1
G170887E43232S    1
G168609E44455S    1
G169769E43801S    1
G168869E44239S    1
G168025E44582S    1
G170925E43205S    1
G170562E43467S    1
G169884E43993S    1
G170591E43444S    1
G170492E43447S    1
G169791E43889S    1
G170806E43177S    1
G170761E43300S    1
G169828E43744S    1
Name: count, dtype: int64

# Other dataset

## 4. FRONT_VARIATION
Glacier length changes from in-situ and remote sensing measurements.

In [32]:
front_variation = pd.read_csv("data/wgms/front_variation.csv")

front_variation_nz = front_variation[front_variation["POLITICAL_UNIT"] == "NZ"]

front_variation_nz_drop = front_variation_nz.drop(["FRONT_VAR_UNC", 
                                                   "SURVEY_PLATFORM_METHOD", 
                                                   "INVESTIGATOR", 
                                                   "SPONS_AGENCY", 
                                                   "REFERENCE", 
                                                   "REMARKS",
                                                   "POLITICAL_UNIT",
                                                   "NAME"], axis=1)
front_variation_nz_drop.rename(columns = {'YEAR':'FRONT_VARIATION_YEAR'}, inplace = True)
front_variation_nz_drop.head()

Unnamed: 0,WGMS_ID,FRONT_VARIATION_YEAR,SURVEY_DATE,REFERENCE_DATE,FRONT_VARIATION,QUALITATIVE_VARIATION
41300,1546,1993,19930215,19890401.0,,+X
41301,1546,1994,19940310,19930215.0,,ST
41302,1546,1995,19950304,19940310.0,,ST
41303,2923,1992,19920407,19870306.0,,-X
41304,2923,1993,19930215,19920407.0,,-X


In [36]:
tmp = front_variation_nz_drop[front_variation_nz_drop["QUALITATIVE_VARIATION"] == "ST"]
tmp["WGMS_ID"].nunique()

70

## 5. MASS_BALANCE:
Glacier mass balance measurements by elevation band.

In [37]:
mass_balance = pd.read_csv("data/wgms/mass_balance.csv")

mass_balance_nz = mass_balance[mass_balance["POLITICAL_UNIT"] == "NZ"]

mass_balance_nz_drop = mass_balance_nz.drop(["LOWER_BOUND", 
                                             "UPPER_BOUND", 
                                             "WINTER_BALANCE_UNC", 
                                             "SUMMER_BALANCE_UNC", 
                                             "ANNUAL_BALANCE_UNC",
                                             "REMARKS",
                                             "POLITICAL_UNIT",
                                             "NAME"], axis=1)
mass_balance_nz_drop.rename(columns = {'YEAR':'MASS_BALANCE_YEAR', 'AREA':'MASS_BALANCE_YEAR_AREA'}, inplace = True)
mass_balance_nz_drop.head()

Unnamed: 0,WGMS_ID,MASS_BALANCE_YEAR,MASS_BALANCE_YEAR_AREA,WINTER_BALANCE,SUMMER_BALANCE,ANNUAL_BALANCE
44238,1597,2005,2.03,2875.0,-1499.0,1376.0
44239,1597,2006,2.03,2248.0,-1557.0,691.0
44240,1597,2007,2.03,3039.0,-2347.0,692.0
44241,1597,2008,2.03,2392.0,-4090.0,-1698.0
44242,1597,2009,2.03,1975.0,-2677.0,-702.0


## 6. SPECIAL_EVENT:
Extraordinary events concerning glacier hazards and dramatic glacier changes.

In [38]:
special_event = pd.read_csv("data/wgms/special_event.csv")

special_event_nz = special_event[special_event["POLITICAL_UNIT"] == "NZ"]

special_event_nz_drop = special_event_nz.drop(["INVESTIGATOR", 
                                               "SPONS_AGENCY", 
                                               "REFERENCE", 
                                               "REMARKS",
                                               "POLITICAL_UNIT",
                                               "NAME",
                                               "EVENT_ID"], axis=1)
special_event_nz_drop.head()

Unnamed: 0,WGMS_ID,EVENT_DATE,ET_SURGE,ET_CALVING,ET_FLOOD,ET_AVALANCHE,ET_TECTONIC,ET_OTHER,EVENT_DESCRIPTION
2906,1580,19920502.0,False,False,False,False,False,True,"The rock avalanche, inspected on 5 May 1992, r..."
2907,1580,19920916.0,False,False,False,False,False,True,"The rock avalanche, inspected on 20 September ..."
2908,1074,19911214.0,False,False,False,False,True,True,Mount Cook Rock Avalanche occurred on 14 Decem...
2909,1074,19949999.0,False,False,False,False,False,True,"During a storm of January 1994, the river brea..."


## 7. RECONSTRUCTION_FRONT_VARIATION:
Glacier length changes reconstructed from historic records and geologic dating.

In [39]:
reconstruction_front_variation = pd.read_csv("data/wgms/reconstruction_front_variation.csv")

reconstruction_front_variation_nz = reconstruction_front_variation[reconstruction_front_variation["POLITICAL_UNIT"] == "NZ"]

reconstruction_front_variation_nz_drop = reconstruction_front_variation_nz.drop(["YEAR_UNC", 
                                                                                 "REF_YEAR_UNC", 
                                                                                 "FRONT_VAR_POS_UNC", 
                                                                                 "FRONT_VAR_NEG_UNC",
                                                                                 "ELEVATION_UNC", 
                                                                                 "METHOD_CODE", 
                                                                                 "METHOD_REMARKS", 
                                                                                 "REMARKS", 
                                                                                 "QUALITATIVE_VARIATION",  # all NaN
                                                                                 "LOWEST_ELEVATION", 
                                                                                 "HIGHEST_ELEVATION",
                                                                                 "MORAINE_DEFINED_MAX",
                                                                                 "POLITICAL_UNIT",
                                                                                 "NAME"], axis=1)
reconstruction_front_variation_nz_drop.rename(columns = {'YEAR':'RECONSTRUCTION_FRONT_VARIATION_YEAR'}, inplace = True)
reconstruction_front_variation_nz_drop.head()

Unnamed: 0,WGMS_ID,REC_SERIES_ID,RECONSTRUCTION_FRONT_VARIATION_YEAR,REFERENCE_YEAR,FRONT_VARIATION
1874,899,36,1780,1600.0,-560.0
1875,899,36,1820,1780.0,141.0
1876,899,36,1865,1820.0,-240.0
1877,899,36,1867,1865.0,-21.0
1878,899,36,1886,1867.0,-29.0


## dataframes after previous steps
- glacier_nz_drop
- state_nz_drop
- change_nz_drop
- front_variation_nz_drop
- mass_balance_nz_drop
- special_event_nz_drop
- reconstruction_front_variation_nz_drop

In [10]:
glacier_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2869 entries, 158448 to 161316
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   POLITICAL_UNIT  2869 non-null   object 
 1   NAME            2869 non-null   object 
 2   WGMS_ID         2869 non-null   int64  
 3   GEN_LOCATION    119 non-null    object 
 4   SPEC_LOCATION   118 non-null    object 
 5   LATITUDE        2869 non-null   float64
 6   LONGITUDE       2869 non-null   float64
 7   PRIM_CLASSIFIC  117 non-null    float64
 8   FORM            118 non-null    float64
 9   FRONTAL_CHARS   118 non-null    float64
 10  EXPOS_ACC_AREA  118 non-null    object 
 11  EXPOS_ABL_AREA  118 non-null    object 
dtypes: float64(5), int64(1), object(6)
memory usage: 291.4+ KB


In [11]:
state_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 165 entries, 8947 to 9111
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   POLITICAL_UNIT     165 non-null    object 
 1   WGMS_ID            165 non-null    int64  
 2   STATE_YEAR         165 non-null    int64  
 3   HIGHEST_ELEVATION  141 non-null    float64
 4   MEDIAN_ELEVATION   121 non-null    float64
 5   LOWEST_ELEVATION   159 non-null    float64
 6   LENGTH             127 non-null    float64
 7   AREA               105 non-null    float64
dtypes: float64(5), int64(2), object(1)
memory usage: 11.6+ KB


In [12]:
change_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45643 entries, 814867 to 860509
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   WGMS_ID           45643 non-null  int64  
 1   SURVEY_ID         45643 non-null  int64  
 2   CHANGE_YEAR       45643 non-null  int64  
 3   AREA_SURVEY_YEAR  45643 non-null  float64
 4   THICKNESS_CHG     45643 non-null  float64
 5   VOLUME_CHANGE     13790 non-null  float64
dtypes: float64(3), int64(3)
memory usage: 2.4 MB


In [13]:
front_variation_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1164 entries, 41300 to 42463
Data columns (total 6 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   WGMS_ID                1164 non-null   int64  
 1   FRONT_VARIATION_YEAR   1164 non-null   int64  
 2   SURVEY_DATE            1164 non-null   int64  
 3   REFERENCE_DATE         1163 non-null   float64
 4   FRONT_VARIATION        244 non-null    float64
 5   QUALITATIVE_VARIATION  937 non-null    object 
dtypes: float64(2), int64(3), object(1)
memory usage: 63.7+ KB


In [14]:
mass_balance_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 62 entries, 44238 to 44299
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   WGMS_ID                 62 non-null     int64  
 1   MASS_BALANCE_YEAR       62 non-null     int64  
 2   MASS_BALANCE_YEAR_AREA  60 non-null     float64
 3   WINTER_BALANCE          38 non-null     float64
 4   SUMMER_BALANCE          38 non-null     float64
 5   ANNUAL_BALANCE          62 non-null     float64
dtypes: float64(4), int64(2)
memory usage: 3.4 KB


In [15]:
special_event_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 2906 to 2909
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   WGMS_ID            4 non-null      int64  
 1   EVENT_DATE         4 non-null      float64
 2   ET_SURGE           4 non-null      bool   
 3   ET_CALVING         4 non-null      bool   
 4   ET_FLOOD           4 non-null      bool   
 5   ET_AVALANCHE       4 non-null      bool   
 6   ET_TECTONIC        4 non-null      bool   
 7   ET_OTHER           4 non-null      bool   
 8   EVENT_DESCRIPTION  4 non-null      object 
dtypes: bool(6), float64(1), int64(1), object(1)
memory usage: 152.0+ bytes


In [16]:
reconstruction_front_variation_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 1874 to 1879
Data columns (total 5 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   WGMS_ID                              6 non-null      int64  
 1   REC_SERIES_ID                        6 non-null      int64  
 2   RECONSTRUCTION_FRONT_VARIATION_YEAR  6 non-null      int64  
 3   REFERENCE_YEAR                       6 non-null      float64
 4   FRONT_VARIATION                      6 non-null      float64
dtypes: float64(2), int64(3)
memory usage: 288.0 bytes


## Map NZ Glacier

In [40]:
glacier_nz_drop["WGMS_ID"].nunique()

2869

In [42]:
# merge nz glacier that has change record
nz_glacier_change = pd.merge(glacier_nz_drop, change_nz_drop, how="right", on="WGMS_ID")
nz_glacier_change.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45643 entries, 0 to 45642
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   POLITICAL_UNIT    45643 non-null  object 
 1   NAME              45643 non-null  object 
 2   WGMS_ID           45643 non-null  int64  
 3   GEN_LOCATION      5755 non-null   object 
 4   SPEC_LOCATION     5748 non-null   object 
 5   LATITUDE          45643 non-null  float64
 6   LONGITUDE         45643 non-null  float64
 7   PRIM_CLASSIFIC    5473 non-null   float64
 8   FORM              5682 non-null   float64
 9   FRONTAL_CHARS     5682 non-null   float64
 10  EXPOS_ACC_AREA    5682 non-null   object 
 11  EXPOS_ABL_AREA    5682 non-null   object 
 12  SURVEY_ID         45643 non-null  int64  
 13  CHANGE_YEAR       45643 non-null  int64  
 14  SURVEY_DATE       45643 non-null  int64  
 15  AREA_SURVEY_YEAR  45643 non-null  float64
 16  THICKNESS_CHG     45643 non-null  float6

In [52]:
nz_glacier_change[nz_glacier_change["CHANGE_YEAR"] == 2019]["WGMS_ID"].nunique()

2758

In [53]:
nz_glacier_change[nz_glacier_change["CHANGE_YEAR"] == 2009]["WGMS_ID"].nunique()

2767

In [19]:
nz_glacier_change_map = nz_glacier_change[["WGMS_ID", "NAME", "LATITUDE", "LONGITUDE"]]
nz_glacier_change_map.head()

Unnamed: 0,WGMS_ID,NAME,LATITUDE,LONGITUDE
0,2923,ADAMS,-43.32,170.720001
1,2923,ADAMS,-43.32,170.720001
2,2923,ADAMS,-43.32,170.720001
3,2923,ADAMS,-43.32,170.720001
4,2923,ADAMS,-43.32,170.720001


In [20]:
gdf_nz_glacier_change_map = (
    gpd.GeoDataFrame(
        nz_glacier_change_map, geometry=gpd.points_from_xy(nz_glacier_change_map.LONGITUDE, nz_glacier_change_map.LATITUDE), crs="EPSG:4326"
    )
)
gdf_nz_glacier_change_map.head()

Unnamed: 0,WGMS_ID,NAME,LATITUDE,LONGITUDE,geometry
0,2923,ADAMS,-43.32,170.720001,POINT (170.72000 -43.32000)
1,2923,ADAMS,-43.32,170.720001,POINT (170.72000 -43.32000)
2,2923,ADAMS,-43.32,170.720001,POINT (170.72000 -43.32000)
3,2923,ADAMS,-43.32,170.720001,POINT (170.72000 -43.32000)
4,2923,ADAMS,-43.32,170.720001,POINT (170.72000 -43.32000)


In [21]:
# gdf_nz_glacier_change_map.explore()

In [22]:
nz_glacier_change["THICKNESS_CHG"].isna().sum()

0

In [23]:
nz_glacier_change["VOLUME_CHANGE"].isna().sum()

31853

In [24]:
nz_glacier_change["WGMS_ID"].nunique()

2816

In [54]:
nz_glacier_change_non_nan = nz_glacier_change[~nz_glacier_change['VOLUME_CHANGE'].isna()]
nz_glacier_change_non_nan["WGMS_ID"].nunique()

2758

In [55]:
nz_glacier_change_non_nan[nz_glacier_change_non_nan["CHANGE_YEAR"] == 2019]["WGMS_ID"].nunique()

2758

In [56]:
nz_glacier_change_non_nan[nz_glacier_change_non_nan["CHANGE_YEAR"] == 2009]["WGMS_ID"].nunique()

2758

In [60]:
nz_glacier_change_non_nan[nz_glacier_change_non_nan["CHANGE_YEAR"] == 2019]

Unnamed: 0,POLITICAL_UNIT,NAME,WGMS_ID,GEN_LOCATION,SPEC_LOCATION,LATITUDE,LONGITUDE,PRIM_CLASSIFIC,FORM,FRONTAL_CHARS,EXPOS_ACC_AREA,EXPOS_ABL_AREA,SURVEY_ID,CHANGE_YEAR,SURVEY_DATE,AREA_SURVEY_YEAR,THICKNESS_CHG,VOLUME_CHANGE
25,NZ,ADAMS,2923,WANGANUI,ADAMS,-43.320000,170.720001,5.0,1.0,8.0,W,N,754563,2019,20191231,4.279,-14020.0,-60000.0
28,NZ,ADAMS,2923,WANGANUI,ADAMS,-43.320000,170.720001,5.0,1.0,8.0,W,N,754566,2019,20191231,4.279,-6110.0,-26160.0
30,NZ,AILSA,2924,HUMBOLDTS,CAPLES,-44.786100,168.187000,6.0,4.0,4.0,S,S,743698,2019,20191231,0.269,-10060.0,-2700.0
33,NZ,AILSA,2924,HUMBOLDTS,CAPLES,-44.786100,168.187000,6.0,4.0,4.0,S,S,743701,2019,20191231,0.269,-5885.0,-1580.0
341,NZ,ALMER/SALISBURY,1548,WAIHO,WAIHO,-43.470001,170.220001,5.0,1.0,8.0,W,SW,755858,2019,20191231,3.337,-13500.0,-45040.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45546,NZ,WIGLEY,1610,WHATAROA,WHATAROA,-43.419998,170.350006,6.0,9.0,0.0,NE,N,750871,2019,20191231,1.428,-3110.0,-4445.0
45550,NZ,WILKINSON,1615,WHITCOMBE,WILKINSON,-43.193199,170.940002,6.0,2.0,4.0,NE,NE,754678,2019,20191231,3.528,-11760.0,-41540.0
45553,NZ,WILKINSON,1615,WHITCOMBE,WILKINSON,-43.193199,170.940002,6.0,2.0,4.0,NE,NE,754681,2019,20191231,3.528,-5850.0,-20645.0
45639,NZ,ZORA,1593,LANDSBOROUGH,HAAST,-43.750000,169.830002,6.0,2.0,8.0,S,S,748488,2019,20191231,4.704,-10840.0,-51080.0


In [27]:
nz_glacier_change_non_nan.to_csv("nz_glacier_change_non_nan.csv")

# New Merge
only change and glacier location info

In [10]:
pd.set_option('display.max_columns', None)

In [11]:
change_nz_drop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45643 entries, 814867 to 860509
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   WGMS_ID           45643 non-null  int64  
 1   CHANGE_YEAR       45643 non-null  int64  
 2   SURVEY_DATE       45643 non-null  int64  
 3   AREA_SURVEY_YEAR  45643 non-null  float64
 4   THICKNESS_CHG     45643 non-null  float64
 5   VOLUME_CHANGE     13790 non-null  float64
dtypes: float64(3), int64(3)
memory usage: 2.4 MB


In [104]:
# What is the best way to fill the nan
# change_nz_drop["VOLUME_CHANGE"] = change_nz_drop["VOLUME_CHANGE"].fillna(0)

In [12]:
merge_1 = pd.merge(change_nz_drop, glacier_nz_drop, how="left", on="WGMS_ID")
merge_1.head()

Unnamed: 0,WGMS_ID,CHANGE_YEAR,SURVEY_DATE,AREA_SURVEY_YEAR,THICKNESS_CHG,VOLUME_CHANGE,NAME,LATITUDE,LONGITUDE,PRIM_CLASSIFIC,FORM,FRONTAL_CHARS,EXPOS_ACC_AREA,EXPOS_ABL_AREA
0,2923,2013,20130228,4.279,5913.0,,ADAMS,-43.32,170.720001,5.0,1.0,8.0,W,N
1,2923,2018,20180307,4.279,-14433.0,,ADAMS,-43.32,170.720001,5.0,1.0,8.0,W,N
2,2923,2018,20180307,4.279,-13184.0,,ADAMS,-43.32,170.720001,5.0,1.0,8.0,W,N
3,2923,2014,20140224,4.279,-7845.0,,ADAMS,-43.32,170.720001,5.0,1.0,8.0,W,N
4,2923,2017,20170304,4.279,-7478.0,,ADAMS,-43.32,170.720001,5.0,1.0,8.0,W,N


In [13]:
new_column_names = ['WGMS_ID', 'CHANGE_YEAR', 'SURVEY_DATE', 'AREA_OF_SURVEY_YEAR',
                   'CHANGE_THICKNESS', 'CHANGE_VOLUME', 'NAME', 'LATITUDE', 'LONGITUDE',
                   'FEATURE_PRIM_CLASS', 'FEATURE_FORM', 'FEATURE_FRONTAL_CHARS', 'DIRECTION_ACCUMULATION', 'DIRECTION_ABLATION']
merge_1.columns = new_column_names

In [14]:
merge_1 = merge_1.iloc[:,[0,6,1,2,3,4,5,7,8,9,10,11,12,13]]

In [15]:
merge_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45643 entries, 0 to 45642
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   WGMS_ID                 45643 non-null  int64  
 1   NAME                    45643 non-null  object 
 2   CHANGE_YEAR             45643 non-null  int64  
 3   SURVEY_DATE             45643 non-null  int64  
 4   AREA_OF_SURVEY_YEAR     45643 non-null  float64
 5   CHANGE_THICKNESS        45643 non-null  float64
 6   CHANGE_VOLUME           13790 non-null  float64
 7   LATITUDE                45643 non-null  float64
 8   LONGITUDE               45643 non-null  float64
 9   FEATURE_PRIM_CLASS      5473 non-null   float64
 10  FEATURE_FORM            5682 non-null   float64
 11  FEATURE_FRONTAL_CHARS   5682 non-null   float64
 12  DIRECTION_ACCUMULATION  5682 non-null   object 
 13  DIRECTION_ABLATION      5682 non-null   object 
dtypes: float64(8), int64(3), object(3)
mem

In [16]:
merge_1.head()

Unnamed: 0,WGMS_ID,NAME,CHANGE_YEAR,SURVEY_DATE,AREA_OF_SURVEY_YEAR,CHANGE_THICKNESS,CHANGE_VOLUME,LATITUDE,LONGITUDE,FEATURE_PRIM_CLASS,FEATURE_FORM,FEATURE_FRONTAL_CHARS,DIRECTION_ACCUMULATION,DIRECTION_ABLATION
0,2923,ADAMS,2013,20130228,4.279,5913.0,,-43.32,170.720001,5.0,1.0,8.0,W,N
1,2923,ADAMS,2018,20180307,4.279,-14433.0,,-43.32,170.720001,5.0,1.0,8.0,W,N
2,2923,ADAMS,2018,20180307,4.279,-13184.0,,-43.32,170.720001,5.0,1.0,8.0,W,N
3,2923,ADAMS,2014,20140224,4.279,-7845.0,,-43.32,170.720001,5.0,1.0,8.0,W,N
4,2923,ADAMS,2017,20170304,4.279,-7478.0,,-43.32,170.720001,5.0,1.0,8.0,W,N


# GLIMS

In [17]:
gla_glims = gpd.read_file('data/glims/nz_glaciers_polygons.shp')

In [18]:
gla_glims.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 9032 entries, 0 to 9031
Data columns (total 39 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   line_type   9032 non-null   object  
 1   anlys_id    9032 non-null   float64 
 2   glac_id     9032 non-null   object  
 3   anlys_time  9032 non-null   object  
 4   area        9032 non-null   float64 
 5   db_area     9032 non-null   float64 
 6   width       9032 non-null   float64 
 7   length      9032 non-null   float64 
 8   primeclass  9032 non-null   float64 
 9   min_elev    9032 non-null   float64 
 10  mean_elev   9032 non-null   float64 
 11  max_elev    9032 non-null   float64 
 12  src_date    9032 non-null   object  
 13  rec_status  9032 non-null   object  
 14  glac_name   9032 non-null   object  
 15  wgms_id     9032 non-null   object  
 16  local_id    9032 non-null   object  
 17  glac_stat   9032 non-null   object  
 18  gone_date   9032 non-null   object  
 19

In [19]:
gla_glims["src_date"].value_counts()

src_date
2019-03-28T00:00:00    4077
2016-03-11T00:00:00    3950
2009-02-17T22:38:29     604
1600-09-09T00:00:00     401
Name: count, dtype: int64

In [20]:
columns_to_drop = [
    'wgms_id', 'local_id', 'glac_stat', 'gone_date', 'gone_dt_e',
    'subm_id', 'rc_id', 'chief_affl', 'conn_lvl', 'surge_type',
    'term_type', 'gtng_o1reg', 'gtng_o2reg', 'rgi_gl_typ', 'loc_unc_x',
    'loc_unc_y', 'glob_unc_x', 'glob_unc_y', 'submitters', 'analysts',
    'mean_elev', 'max_elev', 'width', 'length', 'primeclass',
    'rec_status', 'proc_desc',
    'anlys_id', 'anlys_time', 'glac_name', 'release_dt', 'geog_area'
]
gla_glims_cleaned = gla_glims.drop(columns=columns_to_drop)
gla_glims_cleaned

Unnamed: 0,line_type,glac_id,area,db_area,min_elev,src_date,geometry
0,glac_bound,G170910E43118S,3.916670,3.916960,0.0,1600-09-09T00:00:00,"POLYGON Z ((170.91298 -43.11761 0.00000, 170.9..."
1,glac_bound,G172719E42080S,0.530637,0.531023,0.0,1600-09-09T00:00:00,"POLYGON Z ((172.72521 -42.07704 0.00000, 172.7..."
2,glac_bound,G169791E43889S,1.520170,1.518870,0.0,1600-09-09T00:00:00,"POLYGON Z ((169.78505 -43.88165 0.00000, 169.7..."
3,glac_bound,G168618E44432S,1.266290,1.263470,0.0,1600-09-09T00:00:00,"POLYGON Z ((168.62173 -44.43379 0.00000, 168.6..."
4,glac_bound,G170012E43673S,70.890000,0.708464,0.0,2009-02-17T22:38:29,"POLYGON Z ((170.00462 -43.66741 0.00000, 170.0..."
...,...,...,...,...,...,...,...
9027,glac_bound,G171135E43078S,0.782568,0.782763,0.0,1600-09-09T00:00:00,"POLYGON Z ((171.13377 -43.07421 0.00000, 171.1..."
9028,glac_bound,G170930E43172S,3.738290,3.738670,0.0,1600-09-09T00:00:00,"POLYGON Z ((170.93579 -43.18184 0.00000, 170.9..."
9029,glac_bound,G169768E43774S,1.871620,1.869940,0.0,1600-09-09T00:00:00,"POLYGON Z ((169.75320 -43.77087 0.00000, 169.7..."
9030,glac_bound,G168427E44498S,0.764732,0.762872,0.0,1600-09-09T00:00:00,"POLYGON Z ((168.43220 -44.49552 0.00000, 168.4..."


In [21]:
gla_glims_cleaned.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 9032 entries, 0 to 9031
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   line_type  9032 non-null   object  
 1   glac_id    9032 non-null   object  
 2   area       9032 non-null   float64 
 3   db_area    9032 non-null   float64 
 4   min_elev   9032 non-null   float64 
 5   src_date   9032 non-null   object  
 6   geometry   9032 non-null   geometry
dtypes: float64(3), geometry(1), object(3)
memory usage: 494.1+ KB


In [22]:
gla_glims_cleaned['min_elev'].value_counts()

min_elev
0.0       5082
799.0      107
594.0       36
1080.0      29
917.0       25
          ... 
1672.0       1
1405.0       1
1269.0       1
1712.0       1
2203.0       1
Name: count, Length: 1051, dtype: int64

In [23]:
gla_glims_cleaned["glac_id"].nunique()

4174

In [24]:
gla_glims_cleaned.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [25]:
gla_glims_cleaned["year"] = gla_glims_cleaned['src_date'].str[:4].astype(int)
gla_glims_cleaned["month"] = gla_glims_cleaned['src_date'].str[5:7].astype(int)
gla_glims_cleaned["day"] = gla_glims_cleaned['src_date'].str[8:10].astype(int)
gla_glims_cleaned = gla_glims_cleaned.drop(["src_date"], axis=1)
gla_glims_cleaned.head()

Unnamed: 0,line_type,glac_id,area,db_area,min_elev,geometry,year,month,day
0,glac_bound,G170910E43118S,3.91667,3.91696,0.0,"POLYGON Z ((170.91298 -43.11761 0.00000, 170.9...",1600,9,9
1,glac_bound,G172719E42080S,0.530637,0.531023,0.0,"POLYGON Z ((172.72521 -42.07704 0.00000, 172.7...",1600,9,9
2,glac_bound,G169791E43889S,1.52017,1.51887,0.0,"POLYGON Z ((169.78505 -43.88165 0.00000, 169.7...",1600,9,9
3,glac_bound,G168618E44432S,1.26629,1.26347,0.0,"POLYGON Z ((168.62173 -44.43379 0.00000, 168.6...",1600,9,9
4,glac_bound,G170012E43673S,70.89,0.708464,0.0,"POLYGON Z ((170.00462 -43.66741 0.00000, 170.0...",2009,2,17


In [26]:
gla_glims_cleaned_2016 = gla_glims_cleaned[gla_glims_cleaned['year'] == 2016]

In [27]:
gla_glims_cleaned_2016["line_type"]

642     glac_bound
643     glac_bound
644     glac_bound
645     glac_bound
646     glac_bound
           ...    
8778    glac_bound
8779    glac_bound
8780    glac_bound
8781    glac_bound
8782    glac_bound
Name: line_type, Length: 3950, dtype: object

In [28]:
gla_glims_cleaned_2019 = gla_glims_cleaned[gla_glims_cleaned['year'] == 2019]

# Spatial Join

In [29]:
merge_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45643 entries, 0 to 45642
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   WGMS_ID                 45643 non-null  int64  
 1   NAME                    45643 non-null  object 
 2   CHANGE_YEAR             45643 non-null  int64  
 3   SURVEY_DATE             45643 non-null  int64  
 4   AREA_OF_SURVEY_YEAR     45643 non-null  float64
 5   CHANGE_THICKNESS        45643 non-null  float64
 6   CHANGE_VOLUME           13790 non-null  float64
 7   LATITUDE                45643 non-null  float64
 8   LONGITUDE               45643 non-null  float64
 9   FEATURE_PRIM_CLASS      5473 non-null   float64
 10  FEATURE_FORM            5682 non-null   float64
 11  FEATURE_FRONTAL_CHARS   5682 non-null   float64
 12  DIRECTION_ACCUMULATION  5682 non-null   object 
 13  DIRECTION_ABLATION      5682 non-null   object 
dtypes: float64(8), int64(3), object(3)
mem

In [41]:
gdf_wgms = (
    gpd.GeoDataFrame(
        merge_1, geometry=gpd.points_from_xy(merge_1.LONGITUDE, merge_1.LATITUDE), crs="EPSG:4326"
    )
)

gdf_wgms.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 45643 entries, 0 to 45642
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype   
---  ------                  --------------  -----   
 0   WGMS_ID                 45643 non-null  int64   
 1   NAME                    45643 non-null  object  
 2   CHANGE_YEAR             45643 non-null  int64   
 3   SURVEY_DATE             45643 non-null  int64   
 4   AREA_OF_SURVEY_YEAR     45643 non-null  float64 
 5   CHANGE_THICKNESS        45643 non-null  float64 
 6   CHANGE_VOLUME           13790 non-null  float64 
 7   LATITUDE                45643 non-null  float64 
 8   LONGITUDE               45643 non-null  float64 
 9   FEATURE_PRIM_CLASS      5473 non-null   float64 
 10  FEATURE_FORM            5682 non-null   float64 
 11  FEATURE_FRONTAL_CHARS   5682 non-null   float64 
 12  DIRECTION_ACCUMULATION  5682 non-null   object  
 13  DIRECTION_ABLATION      5682 non-null   object  
 14  geometry      

In [31]:
gdf_wgms.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [32]:
gla_glims_cleaned.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [46]:
wgms_glims =  gla_glims_cleaned.sjoin(gdf_wgms, predicate="intersects")

In [47]:
wgms_glims = wgms_glims.drop(["index_right"], axis=1)
wgms_glims = wgms_glims.reset_index()

In [48]:
wgms_glims.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 93065 entries, 0 to 93064
Data columns (total 24 columns):
 #   Column                  Non-Null Count  Dtype   
---  ------                  --------------  -----   
 0   index                   93065 non-null  int64   
 1   line_type               93065 non-null  object  
 2   glac_id                 93065 non-null  object  
 3   area                    93065 non-null  float64 
 4   db_area                 93065 non-null  float64 
 5   min_elev                93065 non-null  float64 
 6   geometry                93065 non-null  geometry
 7   year                    93065 non-null  int32   
 8   month                   93065 non-null  int32   
 9   day                     93065 non-null  int32   
 10  WGMS_ID                 93065 non-null  int64   
 11  NAME                    93065 non-null  object  
 12  CHANGE_YEAR             93065 non-null  int64   
 13  SURVEY_DATE             93065 non-null  int64   
 14  AREA_OF_SURVEY

In [50]:
wgms_glims

Unnamed: 0,index,line_type,glac_id,area,db_area,min_elev,geometry,year,month,day,WGMS_ID,NAME,CHANGE_YEAR,SURVEY_DATE,AREA_OF_SURVEY_YEAR,CHANGE_THICKNESS,CHANGE_VOLUME,LATITUDE,LONGITUDE,FEATURE_PRIM_CLASS,FEATURE_FORM,FEATURE_FRONTAL_CHARS,DIRECTION_ACCUMULATION,DIRECTION_ABLATION
0,0,glac_bound,G170910E43118S,3.91667,3.91696,0.0,"POLYGON Z ((170.91298 -43.11761 0.00000, 170.9...",1600,9,9,146277,RGI60-18.03359,2004,20041231,0.081,-535.0,-40.0,-43.1210,170.905000,,,,,
1,0,glac_bound,G170910E43118S,3.91667,3.91696,0.0,"POLYGON Z ((170.91298 -43.11761 0.00000, 170.9...",1600,9,9,146277,RGI60-18.03359,2019,20191231,0.081,-2055.0,-165.0,-43.1210,170.905000,,,,,
2,0,glac_bound,G170910E43118S,3.91667,3.91696,0.0,"POLYGON Z ((170.91298 -43.11761 0.00000, 170.9...",1600,9,9,146277,RGI60-18.03359,2014,20141231,0.081,-1150.0,-90.0,-43.1210,170.905000,,,,,
3,0,glac_bound,G170910E43118S,3.91667,3.91696,0.0,"POLYGON Z ((170.91298 -43.11761 0.00000, 170.9...",1600,9,9,146277,RGI60-18.03359,2009,20091231,0.081,-2115.0,-170.0,-43.1210,170.905000,,,,,
4,0,glac_bound,G170910E43118S,3.91667,3.91696,0.0,"POLYGON Z ((170.91298 -43.11761 0.00000, 170.9...",1600,9,9,146277,RGI60-18.03359,2019,20191231,0.081,-5860.0,-460.0,-43.1210,170.905000,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93060,9031,glac_bound,G168412E44499S,1.79226,1.78786,0.0,"POLYGON Z ((168.42216 -44.49611 0.00000, 168.4...",1600,9,9,21716,UNNAMED 21716,2013,20130415,0.585,-6698.0,,-44.4991,168.412003,,,,,
93061,9031,glac_bound,G168412E44499S,1.79226,1.78786,0.0,"POLYGON Z ((168.42216 -44.49611 0.00000, 168.4...",1600,9,9,21716,UNNAMED 21716,2017,20170104,0.585,-8289.0,,-44.4991,168.412003,,,,,
93062,9031,glac_bound,G168412E44499S,1.79226,1.78786,0.0,"POLYGON Z ((168.42216 -44.49611 0.00000, 168.4...",1600,9,9,21716,UNNAMED 21716,2012,20120123,0.585,-7963.0,,-44.4991,168.412003,,,,,
93063,9031,glac_bound,G168412E44499S,1.79226,1.78786,0.0,"POLYGON Z ((168.42216 -44.49611 0.00000, 168.4...",1600,9,9,21716,UNNAMED 21716,2017,20170104,0.585,-6355.0,,-44.4991,168.412003,,,,,


In [58]:
# The following col might contain useful information, but they have missing values
# 16  CHANGE_VOLUME           14130 non-null  float64
# 19  FEATURE_PRIM_CLASS      17198 non-null  float64# 20  FEATURE_FORM            18034 non-null  float6
# 21  FEATURE_FRONTAL_CHARS   18034 non-null  float
#  22  DIRECTION_ACCUMULAIONN  18034 non-null  obect
# 
 23  DIRECTION_ABLATION      18034 non-null  jectct  

IndentationError: unexpected indent (2186255304.py, line 8)