In [2]:
from os.path import join, expanduser
import os
import boto3
from rasterstats import zonal_stats
import geopandas as gpd
from botocore.config import Config
from botocore import UNSIGNED
import rasterio as rio
import geopandas as gpd
from rasterio import features
from rasterio.plot import show
from affine import Affine
from shapely.geometry import box
import pandas as pd
from tqdm import tqdm

In [3]:
adm1 = gpd.read_file(join(expanduser("~"), 'data', 'pacific', 'admin', 'PIC_adm1.shp'))
adm2 = gpd.read_file(join(expanduser("~"), 'data', 'pacific', 'admin', 'PIC_adm2.shp'))

In [4]:
out_dir = join(expanduser("~"), 'data', 'pacific', 'output', 'climate')
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [5]:
os.listdir(out_dir)

['adm1_cdd_monthly_max_1mm.csv',
 'adm1_cdd_monthly_max_5mm.csv',
 'adm1_cwd_monthly_max_1mm.csv',
 'adm1_cwd_monthly_max_5mm.csv',
 'adm1_drydays_monthly_sum_1mm.csv',
 'adm1_drydays_monthly_sum_5mm.csv',
 'adm1_wetdays_monthly_sum_1mm.csv',
 'adm1_spei03.csv',
 'adm1_spei12.csv',
 'adm1_spi03.csv',
 'adm1_spi12.csv',
 'adm1_wetdays_monthly_sum_5mm.csv']

In [6]:
dfs = [pd.read_csv(join(out_dir, f), index_col=0) for f in os.listdir(out_dir)]

In [7]:
def cleanDate(df):
    df.loc[:, "date"] = df.loc[:, "date"].astype('str')
    df.loc[:, "date"] = df.loc[:, "date"].str.replace('-', '')
    return df

In [8]:
dfs = [cleanDate(df) for df in dfs]

In [9]:
df_master = dfs[0].copy()

In [10]:
df_master.drop(['ADM0_NAME', 'ADM0_PCODE', 'ADM1_NAME', 'ADM1_PCODE'], axis=1, inplace=True)

In [11]:
dfs.pop(0)

Unnamed: 0,index,ADM0_NAME,ADM0_PCODE,ADM1_NAME,ADM1_PCODE,cdd_monthly_max_1mm_max,cdd_monthly_max_1mm_mean,cdd_monthly_max_1mm_std,cdd_monthly_max_1mm_median,date
0,0,Papua New Guinea,PG,Autonomous Region of Bougainville,PG20,8.0,3.077922,1.192686,3.0,200006
1,0,Papua New Guinea,PG,Autonomous Region of Bougainville,PG20,6.0,3.233766,1.079876,3.0,200007
2,0,Papua New Guinea,PG,Autonomous Region of Bougainville,PG20,7.0,3.402597,0.856749,3.0,200008
3,0,Papua New Guinea,PG,Autonomous Region of Bougainville,PG20,11.0,6.298701,2.439209,6.0,200009
4,0,Papua New Guinea,PG,Autonomous Region of Bougainville,PG20,5.0,2.805195,0.912239,3.0,200010
...,...,...,...,...,...,...,...,...,...,...
33924,130,Samoa,WS,Vaisigano,WSM-ADM1-40696546B87181747,10.0,9.000000,1.000000,9.0,202108
33925,130,Samoa,WS,Vaisigano,WSM-ADM1-40696546B87181747,18.0,15.500000,2.500000,15.5,202109
33926,130,Samoa,WS,Vaisigano,WSM-ADM1-40696546B87181747,5.0,4.500000,0.500000,4.5,202110
33927,130,Samoa,WS,Vaisigano,WSM-ADM1-40696546B87181747,3.0,2.000000,1.000000,2.0,202111


In [12]:
def mergeDf(df):
    global df_master
    df_master = df_master.merge(df, on=['index', 'date'], how='outer') # outer

In [13]:
[mergeDf(df) for df in dfs]

[None, None, None, None, None, None, None, None, None, None, None]

In [14]:
adm1[['ADM0_NAME', 'ADM0_PCODE', 'ADM1_NAME', 'ADM1_PCODE']]

Unnamed: 0,ADM0_NAME,ADM0_PCODE,ADM1_NAME,ADM1_PCODE
0,Papua New Guinea,PG,Autonomous Region of Bougainville,PG20
1,Papua New Guinea,PG,Central Province,PG03
2,Papua New Guinea,PG,Chimbu (Simbu) Province,PG10
3,Papua New Guinea,PG,East New Britain Province,PG18
4,Papua New Guinea,PG,East Sepik Province,PG14
...,...,...,...,...
126,Samoa,WS,Palauli,WSM-ADM1-40696546B22381772
127,Samoa,WS,Satupa'itea,WSM-ADM1-40696546B90588116
128,Samoa,WS,Tuamasaga,WSM-ADM1-40696546B95868609
129,Samoa,WS,Va'a-o-Fonoti,WSM-ADM1-40696546B68230151


In [15]:
df_master = df_master.merge(adm1[['ADM0_NAME', 'ADM0_PCODE', 'ADM1_NAME', 'ADM1_PCODE']], left_on="index", right_index=True)

In [19]:
df_master.sort_values(['ADM0_NAME', 'ADM1_NAME', 'date'], ascending=True, inplace=True)

In [20]:
out_dir

'/home/wb514197/data/pacific/output/climate'

In [21]:
df_master.loc[:, "year"] = df_master.loc[:, "date"].str.slice(start=0, stop=4).astype('int')
df_master.loc[:, "month"] = df_master.loc[:, "date"].str.slice(start=4, stop=6).astype('int')

In [26]:
df_master = df_master.loc[df_master.year>=1981].copy()

Reorder columns

In [32]:
df_master = df_master[['index', 'ADM0_NAME', 'ADM0_PCODE','ADM1_NAME', 'ADM1_PCODE', 'year', 'month', 'date',
        'cdd_monthly_max_1mm_max', 'cdd_monthly_max_1mm_mean',
       'cdd_monthly_max_1mm_std', 'cdd_monthly_max_1mm_median',
       'cdd_monthly_max_5mm_max', 'cdd_monthly_max_5mm_mean',
       'cdd_monthly_max_5mm_std', 'cdd_monthly_max_5mm_median',
       'cwd_monthly_max_1mm_max', 'cwd_monthly_max_1mm_mean',
       'cwd_monthly_max_1mm_std', 'cwd_monthly_max_1mm_median',
       'cwd_monthly_max_5mm_max', 'cwd_monthly_max_5mm_mean',
       'cwd_monthly_max_5mm_std', 'cwd_monthly_max_5mm_median',
       'drydays_monthly_sum_1mm_max', 'drydays_monthly_sum_1mm_mean',
       'drydays_monthly_sum_1mm_std', 'drydays_monthly_sum_1mm_median',
       'drydays_monthly_sum_5mm_max', 'drydays_monthly_sum_5mm_mean',
       'drydays_monthly_sum_5mm_std', 'drydays_monthly_sum_5mm_median',
       'wetdays_monthly_sum_1mm_max', 'wetdays_monthly_sum_1mm_mean',
       'wetdays_monthly_sum_1mm_std', 'wetdays_monthly_sum_1mm_median',
       'spei03_median', 'spei03_max', 'spei03_mean', 'spei03_std',
       'spei12_median', 'spei12_max', 'spei12_mean', 'spei12_std', 'spi03_max',
       'spi03_mean', 'spi03_std', 'spi03_median', 'spi12_max', 'spi12_mean',
       'spi12_std', 'spi12_median', 'wetdays_monthly_sum_5mm_max',
       'wetdays_monthly_sum_5mm_mean', 'wetdays_monthly_sum_5mm_std',
       'wetdays_monthly_sum_5mm_median']]

In [33]:
df_master.to_csv('/home/wb514197/data/pacific/output/adm1_climate_full.csv', index=False)