# Preparation

In [None]:
!pip install netcdf4

In [None]:
!pip install swifter

In [None]:
!pip install rasterio

In [None]:
!pip install earthpy

In [None]:
import datetime as dt  
import pandas as pd
import numpy as np
from netCDF4 import Dataset  # http://code.google.com/p/netcdf4-python/
import matplotlib.pyplot as plt
import urllib.request
from tqdm import tqdm_notebook
import swifter
import pickle
import os
import rasterio as rio
import earthpy as et

In [None]:
PATH= os.path.join("/content", "drive", "My Drive", "Colab Notebooks", "dissertation")

## Select species

In [None]:
file_id="nutwoo"
bc_id=32

# file_id="recwoo"
# bc_id=27

# file_id="lewwoo"
# bc_id="9n10"

#Download climatic data

## Prepare the long term climatic data (Bioclimatic data (30 years))

In [None]:
#Download 30 years data
print('Start')
urllib.request.urlretrieve('https://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_bio.zip','wc2.1_10m_bio.zip')
print('End')

In [None]:
import zipfile
with zipfile.ZipFile('wc2.1_10m_bio.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [None]:
src = rio.open('/content/wc2.1_10m_bio_1.tif')
src.closed

In [None]:
plt.imshow(src.read(1))

In [None]:
src.meta

## Prepare average 30 climate data of each month

In [None]:
#Download
print('Download Start')
urllib.request.urlretrieve('http://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tmin.zip','wc2.1_10m_tmin.zip')
urllib.request.urlretrieve('http://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tmax.zip','wc2.1_10m_tmax.zip')
urllib.request.urlretrieve('http://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_tavg.zip','wc2.1_10m_tavg.zip')
urllib.request.urlretrieve('http://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_prec.zip','wc2.1_10m_prec.zip')
urllib.request.urlretrieve('http://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_srad.zip','wc2.1_10m_srad.zip')
urllib.request.urlretrieve('http://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_wind.zip','wc2.1_10m_wind.zip')
urllib.request.urlretrieve('http://biogeo.ucdavis.edu/data/worldclim/v2.1/base/wc2.1_10m_vapr.zip','wc2.1_10m_vapr.zip')
print('Download End')

In [None]:
import zipfile
with zipfile.ZipFile('wc2.1_10m_tmin.zip', 'r') as zip_ref:
    zip_ref.extractall()
with zipfile.ZipFile('wc2.1_10m_tmax.zip', 'r') as zip_ref:
    zip_ref.extractall()
with zipfile.ZipFile('wc2.1_10m_tavg.zip', 'r') as zip_ref:
    zip_ref.extractall()
with zipfile.ZipFile('wc2.1_10m_prec.zip', 'r') as zip_ref:
    zip_ref.extractall()
with zipfile.ZipFile('wc2.1_10m_srad.zip', 'r') as zip_ref:
    zip_ref.extractall()
with zipfile.ZipFile('wc2.1_10m_wind.zip', 'r') as zip_ref:
    zip_ref.extractall()
with zipfile.ZipFile('wc2.1_10m_vapr.zip', 'r') as zip_ref:
    zip_ref.extractall()

## Prepare the short term climatic data

In [None]:
print('Start')
for year in range(2005,2020):
    year=str(year)
    print(f'Downloading {year}')
    url1='ftp://ftp.cdc.noaa.gov/Datasets/cpc_global_precip/precip.'+year+'.nc'
    url2='ftp://ftp.cdc.noaa.gov/Datasets/cpc_global_temp/tmax.'+year+'.nc'
    url3='ftp://ftp.cdc.noaa.gov/Datasets/cpc_global_temp/tmin.'+year+'.nc'

    #---Save to Google Drive---
    # urllib.request.urlretrieve(url1, PATH+'/precip.'+year+'.nc')
    # urllib.request.urlretrieve(url2, PATH+'/tmax.'+year+'.nc')
    # urllib.request.urlretrieve(url3, PATH+'/tmin.'+year+'.nc')
    #--------------------------
    
    urllib.request.urlretrieve(url1, 'precip.'+year+'.nc')
    urllib.request.urlretrieve(url2, 'tmax.'+year+'.nc')
    urllib.request.urlretrieve(url3, 'tmin.'+year+'.nc')
print('End')

# Load eBird data

In [None]:
tmp=pd.read_csv(os.path.join(PATH,"ebd_"+file_id+"_bcr"+str(bc_id)+"_zf.csv"))

# Data modification for the short term climatic data

In [None]:
urllib.request.urlretrieve('ftp://ftp.cdc.noaa.gov/Datasets/cpc_global_temp/tmin.2019.nc', 'tmin.2019.nc')

In [None]:
file=Dataset('tmin.2019.nc','r')
ls_precip_files=[ Dataset('precip.'+str(y)+'.nc','r') for y in range(2005,2020)]
ls_tmax_files=[ Dataset('tmax.'+str(y)+'.nc','r') for y in range(2005,2020)]
ls_tmin_files=[ Dataset('tmin.'+str(y)+'.nc','r') for y in range(2005,2020)]

In [None]:
ls_lon=file.variables['lon'][:]
ls_lat=file.variables['lat'][:]

print('Step1')
res1 = dict(zip(ls_lat, np.arange(len(ls_lat)))) 
res2 = dict(zip(ls_lon, np.arange(len(ls_lon)))) 

print('Step2')
tmp['year_month']=tmp['observation_date'].apply(lambda x: x.replace('-','')[:-2])
tmp['month']=tmp['observation_date'].apply(lambda x: int(x.replace('-','')[4:-2]))##<----Added!!!
tmp['day']=tmp['observation_date'].apply(lambda x: int(x.replace('-','')[-2:]))
tmp['longitude360']=tmp['longitude']%360


print('Step2.1')
lat_max=tmp['latitude'].max()
lat_max_close=min(res1, key=lambda y:abs(y-lat_max))
lat_max_close_num=res1[min(res1, key=lambda y:abs(y-lat_max))]

lat_min=tmp['latitude'].min()
lat_min_close=min(res1, key=lambda y:abs(y-lat_min))
lat_min_close_num=res1[min(res1, key=lambda y:abs(y-lat_min))]

tmp['lat_cate']=pd.cut(tmp.latitude, bins=np.arange(lat_min_close-0.25, lat_max_close+0.25+0.5,0.5))
tmp['lat_num']=pd.cut(tmp.latitude, bins=np.arange(lat_min_close-0.25, lat_max_close+0.25+0.5,0.5),labels=np.arange(lat_min_close_num, lat_max_close_num-1,-1))

print('Step2.2')
lon_max=tmp['longitude360'].max()
lon_max_close=min(res2, key=lambda y:abs(y-lon_max))
lon_max_close_num=res2[min(res2, key=lambda y:abs(y-lon_max))]

lon_min=tmp['longitude360'].min()
lon_min_close=min(res2, key=lambda y:abs(y-lon_min))
lon_min_close_num=res2[min(res2, key=lambda y:abs(y-lon_min))]

tmp['lon_cate']=pd.cut(tmp.longitude360, bins=np.arange(lon_min_close-0.25, lon_max_close+0.25+0.5,0.5))
tmp['lon_num']=pd.cut(tmp.longitude360, bins=np.arange(lon_min_close-0.25, lon_max_close+0.25+0.5,0.5),labels=np.arange(lon_min_close_num, lon_max_close_num+1,1))

print('Step3')
tmp['day_since_2009']=int(99999)
tmp.loc[tmp['year']<=2012,'day_since_2009']=(tmp['day_of_year']+(tmp['year']-2009)*365)[tmp['year']<=2012]
tmp.loc[(tmp['year']<=2016)&(tmp['year']>2012),'day_since_2009']=(tmp['day_of_year']+(tmp['year']-2009)*365+1)[(tmp['year']<=2016)&(tmp['year']>2012)]
tmp.loc[(tmp['year']>2016),'day_since_2009']=(tmp['day_of_year']+(tmp['year']-2009)*365+2)[tmp['year']>2016]

tmp['day_since_2007']=int(99999)
tmp.loc[tmp['year']<=2012,'day_since_2007']=(tmp['day_of_year']+(tmp['year']-2009)*365)[tmp['year']<=2012]+731
tmp.loc[(tmp['year']<=2016)&(tmp['year']>2012),'day_since_2007']=(tmp['day_of_year']+(tmp['year']-2009)*365+1)[(tmp['year']<=2016)&(tmp['year']>2012)]+731
tmp.loc[(tmp['year']>2016),'day_since_2007']=(tmp['day_of_year']+(tmp['year']-2009)*365+2)[tmp['year']>2016]+731

tmp['day_since_2005']=int(99999)
tmp.loc[tmp['year']<=2012,'day_since_2005']=(tmp['day_of_year']+(tmp['year']-2009)*365)[tmp['year']<=2012]+731+730
tmp.loc[(tmp['year']<=2016)&(tmp['year']>2012),'day_since_2005']=(tmp['day_of_year']+(tmp['year']-2009)*365+1)[(tmp['year']<=2016)&(tmp['year']>2012)]+731+730
tmp.loc[(tmp['year']>2016),'day_since_2005']=(tmp['day_of_year']+(tmp['year']-2009)*365+2)[tmp['year']>2016]+731+730

print('Step4')
res1_reverse={v:float(k) for k, v in res1.items()}
res2_reverse={v:float(k) for k, v in res2.items()}

tmp['lat_cate_val']=tmp['lat_num'].apply(lambda x:res1_reverse[x])
tmp=tmp.astype({'lat_cate_val': 'float64'})

tmp['lon_cate_val']=tmp['lon_num'].apply(lambda x:res2_reverse[x])
tmp=tmp.astype({'lon_cate_val': 'float64'})
tmp['lon_cate_val']=tmp['lon_cate_val']%-360

print('Step5')

use_col=["checklist_id",'lat_num', 'lon_num','lat_cate_val','lon_cate_val','year','month','day_of_year','day_since_2005']
checklist=tmp[use_col]
print('Done!')

# Save lat_num and lon_num dictionary data

## Bioclimatic data

In [None]:
ls_bio=[]
for i in range(1,20):
    ls_bio.append('wc2.1_10m_bio_'+ str(i) +'.tif')
ls_bio

In [None]:
test=checklist.groupby(['lat_cate_val','lon_cate_val']).size().reset_index(name='count').query('count>0').reset_index(drop=True)
display(test)
coordinates=[(lon,lat) for lon, lat in zip(test['lon_cate_val'],test['lat_cate_val'])]
dic={}
for en, bio in tqdm_notebook(enumerate(ls_bio)):
    ls_val=[]
    src = rio.open('/content/'+bio)
    for i in src.sample(coordinates):
        if i[0]<-1000000000:
            ls_val.append(np.nan)
        else:
            ls_val.append(i[0])
    bio_name='bio'+str(en+1)
    dic[bio_name]=ls_val

df_bioclimatic30y=pd.concat([test,pd.DataFrame(dic)],axis=1)
display(df_bioclimatic30y)

with open(os.path.join(PATH,'df_bioclimatic30y'+file_id+'.pkl'), 'wb') as handle:
    pickle.dump(df_bioclimatic30y, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Monthly 30y climate data

In [None]:
test=checklist.groupby(['lat_cate_val','lon_cate_val']).size().reset_index(name='count').query('count>0').reset_index(drop=True)
display(test)
test['tttt']=1
df_month=pd.DataFrame({'month':[1,2,3,4,5,6,7,8,9,10,11,12]})
df_month['tttt']=1
test=pd.merge(df_month,test,on=['tttt']).drop('tttt', axis=1)
display(test)

In [None]:
def clm30y_monthly(var, checklist):
    print(f'Start:{var}')
    ls=[]
    for i in range(1,13):
        ls.append('wc2.1_10m_' + var + '_'+ str(i).zfill(2) +'.tif')

    # test=checklist.groupby(['lat_cate_val','lon_cate_val','month']).size().reset_index(name='count').query('count>0').reset_index(drop=True)
    # test=test.sort_values(by=['month'],ignore_index=True)
    # display(test)

    test=checklist.groupby(['lat_cate_val','lon_cate_val']).size().reset_index(name='count').query('count>0').reset_index(drop=True)
    display(test)
    test['tttt']=1
    df_month=pd.DataFrame({'month':[1,2,3,4,5,6,7,8,9,10,11,12]})
    df_month['tttt']=1
    test=pd.merge(df_month,test,on=['tttt']).drop('tttt', axis=1)
    display(test)

    #coordinates=[[(lon,lat),m] for lon, lat,m in zip(test['lon_cate_val'],test['lat_cate_val'],test['month'])]
    dic={}
    ls_val=[]
    for m, clm_data in tqdm_notebook(enumerate(ls)):
        #ls_val=[]
        src = rio.open('/content/'+clm_data)
        coordinates=[(lon,lat) for lon, lat in zip(test['lon_cate_val'][test['month']==m+1],test['lat_cate_val'][test['month']==m+1])]
        for i in src.sample(coordinates):
            if i[0]<-1000000000: #欠損値の場合[-3.400000e+38]のため
                ls_val.append(np.nan)
            else:
                ls_val.append(i[0])
        #m_name=str(en+1)
        #dic[m_name]=ls_val


    dic[var+'_30y_monthly']=ls_val
    data_30y_monthly=pd.concat([test,pd.DataFrame(dic)],axis=1)
    display(data_30y_monthly)

    with open(os.path.join(PATH,var+'_30y_monthly'+file_id+'.pkl'), 'wb') as handle:
        pickle.dump(data_30y_monthly, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(f'End:{var}')

In [None]:
clm30y_monthly('tmin', checklist)
clm30y_monthly('tmax', checklist)
clm30y_monthly('tavg', checklist)
clm30y_monthly('prec', checklist)
clm30y_monthly('srad', checklist)
clm30y_monthly('wind', checklist)
clm30y_monthly('vapr', checklist)

## Precip for the short term climatic data

In [None]:
test=checklist.groupby(['lat_num','lon_num']).size().reset_index(name='count').query('count>0').reset_index(drop=True)
display(test)
dic={}
for lat, lon in tqdm_notebook(zip(test['lat_num'],test['lon_num'])):
    ls=[]
    for file in ls_precip_files:
        ls.extend(file.variables['precip'][:,lat,lon])
        #print('done')
    dic[(lat,lon)]=ls

with open(os.path.join(PATH,'dic_prec_2005_2019'+file_id+'.pkl'), 'wb') as handle:
    pickle.dump(dic, handle, protocol=pickle.HIGHEST_PROTOCOL)

## tmax for the short term climatic data

In [None]:
test=checklist.groupby(['lat_num','lon_num']).size().reset_index(name='count').query('count>0').reset_index(drop=True)
display(test)
dic_tmax={}
for lat, lon in tqdm_notebook(zip(test['lat_num'],test['lon_num'])):
    ls=[]
    for file in ls_tmax_files:
        ls.extend(file.variables['tmax'][:,lat,lon])
        #print('done')
    dic_tmax[(lat,lon)]=ls

import pickle
with open(os.path.join(PATH,'dic_tmax_2005_2019'+file_id+'.pkl'), 'wb') as handle:
    pickle.dump(dic_tmax, handle, protocol=pickle.HIGHEST_PROTOCOL)

## tmin for the short term climatic data

In [None]:
test=checklist.groupby(['lat_num','lon_num']).size().reset_index(name='count').query('count>0').reset_index(drop=True)
display(test)
dic_tmin={}
for lat, lon in tqdm_notebook(zip(test['lat_num'],test['lon_num'])):
    ls=[]
    for file in ls_tmin_files:
        ls.extend(file.variables['tmin'][:,lat,lon])
        #print('done')
    dic_tmin[(lat,lon)]=ls

import pickle
with open(os.path.join(PATH,'dic_tmin_2005_2019'+file_id+'.pkl'), 'wb') as handle:
    pickle.dump(dic_tmin, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Load necessary dictionary data

In [None]:
with open(os.path.join(PATH,'df_bioclimatic30y'+file_id+'.pkl'), 'rb') as handle:
    df_bioclimatic30y=pickle.load(handle)
with open(os.path.join(PATH,'dic_prec_2005_2019'+file_id+'.pkl'), 'rb') as handle:
    dic_prec = pickle.load(handle)
with open(os.path.join(PATH,'dic_tmax_2005_2019'+file_id+'.pkl'), 'rb') as handle:
    dic_tmax = pickle.load(handle)
with open(os.path.join(PATH,'dic_tmin_2005_2019'+file_id+'.pkl'), 'rb') as handle:
    dic_tmin = pickle.load(handle)


with open(os.path.join(PATH,'tmin'+'_30y_monthly'+file_id+'.pkl'), 'rb') as handle:
    df_tmin30y = pickle.load(handle)
with open(os.path.join(PATH,'tmax'+'_30y_monthly'+file_id+'.pkl'), 'rb') as handle:
    df_tmax30y = pickle.load(handle)
with open(os.path.join(PATH,'tavg'+'_30y_monthly'+file_id+'.pkl'), 'rb') as handle:
    df_tavg30y = pickle.load(handle)
with open(os.path.join(PATH,'prec'+'_30y_monthly'+file_id+'.pkl'), 'rb') as handle:
    df_prec30y = pickle.load(handle)
with open(os.path.join(PATH,'srad'+'_30y_monthly'+file_id+'.pkl'), 'rb') as handle:
    df_srad30y = pickle.load(handle)
with open(os.path.join(PATH,'wind'+'_30y_monthly'+file_id+'.pkl'), 'rb') as handle:
    df_wind30y = pickle.load(handle)
with open(os.path.join(PATH,'vapr'+'_30y_monthly'+file_id+'.pkl'), 'rb') as handle:
    df_vapr30y = pickle.load(handle)

In [None]:
checklist['lat_lon'] = list(zip(checklist.lat_num, checklist.lon_num))

# Convert NaN values to closed ones 

## For 30y monthly data

In [None]:
DFs=[df_tmin30y,
df_tmax30y,
df_tavg30y,
df_prec30y,
df_srad30y,
df_wind30y,
df_vapr30y]

In [None]:
#Check if there are any NaN values
for DF in DFs:
    print(DF.iloc[:,4].isnull().value_counts())

In [None]:
#Check if there are any NaN like values
for DF in DFs:
    print(DF.iloc[:,4].value_counts())

In [None]:
#Convert NaN like data to np.nan
df_prec30y.loc[:,'prec_30y_monthly']=df_prec30y.prec_30y_monthly.replace(-32768, np.nan)
df_srad30y.loc[:,'srad_30y_monthly']=df_srad30y.srad_30y_monthly.replace(65535, np.nan)

In [None]:
import math
def Check_NaN_30ydata(DF):
    yy=0
    for i in range(len(DF)):
        if math.isnan(DF.iloc[i,4]):
            yy+=1
            print(f"Row:{i}")
            for j in [0.5,-0.5,1.0,-1.0,1.5,-1.5,2.0,-2.0]:
                check_nan=DF[(DF['lat_cate_val']==DF['lat_cate_val'][i])&
                             (DF['lon_cate_val']==DF['lon_cate_val'][i]+j)&
                             (DF['month']==DF['month'][i])]
                try:
                    if math.isnan(check_nan.iloc[:,4])==False:
                        print(f'replace at{j}')
                        DF.iloc[i,4]=check_nan.iloc[:,4].values
                        break
                except:
                    print(f"Row:{i},Find:{j}, None!")
                    continue
    print(f'total:{yy}')

In [None]:
#Check
for DF in DFs:
    display(DF)
    Check_NaN_30ydata(DF)

In [None]:
#Check
for DF in DFs:
    print(DF.iloc[:,4].isnull().value_counts())

In [None]:
for DF in DFs:
    checklist=pd.merge(checklist, DF, on=['lat_cate_val','lon_cate_val','month'], how="left")

## For 30y annual data

In [None]:
import math
yy=0
for i in range(len(df_bioclimatic30y)):
    if math.isnan(df_bioclimatic30y['bio1'][i]):
        yy+=1
        print(i)
        for j in [0.5,-0.5,1.0,-1.0,1.5,-1.5,2.0,-2.0]:
            check_nan=df_bioclimatic30y[(df_bioclimatic30y['lat_cate_val']==df_bioclimatic30y['lat_cate_val'][i])&
                        (df_bioclimatic30y['lon_cate_val']==df_bioclimatic30y['lon_cate_val'][i]+j)]
            try:
                if math.isnan(check_nan['bio1'])==False:
                    print(j)
                    display(check_nan.loc[:,'bio1':])

                    for bio_name in list(check_nan.loc[:,'bio1':]):
                        #print(bio_name)
                        df_bioclimatic30y.loc[(df_bioclimatic30y['lat_cate_val']==df_bioclimatic30y['lat_cate_val'][i])&
                            (df_bioclimatic30y['lon_cate_val']==df_bioclimatic30y['lon_cate_val'][i]),bio_name]=check_nan.loc[:,bio_name].values
                    break
            except:
                print(f"{j} None!")
                continue
        #print('end at',j)
print(f'total:{yy}')

In [None]:
checklist=pd.merge(checklist, df_bioclimatic30y, on=['lat_cate_val','lon_cate_val'], how="outer") #"outer"で大丈夫だと思う。

In [None]:
checklist.drop(columns=['count_x','count_y','count'],inplace=True)

## For the short term climatic data

In [None]:
# Check masked data----> need to change the coordinate
j=0
for k in dic_tmin.keys():
    if dic_tmin[k][0] is np.ma.masked:
        j+=1
        print(k)
print(j)

j=0
for k in dic_tmin.keys():
    if dic_tmin[k][2000] is np.ma.masked:
        j+=1
        print(k)
print(j)

In [None]:
#====Select one of them
#For BCR27
map_lat_lon={(104, 567):(104, 566),
 (105, 567):(105, 566),
 (106, 568):(106, 567),
 (107, 568):(107, 567),
 (108, 569):(108, 568),
 (109, 567):(109, 566),
 (109, 568):(109, 566),
 (109, 569):(109, 566),
 (110, 567):(110, 566),
 (111, 565):(111, 564),
 (112, 563):(112, 562),
 (112, 564):(112, 562),
 (114, 560):(114, 559),
 (115, 559):(115, 558),
 (116, 558):(116, 557),
 (117, 557):(117, 556),
 (118, 557):(118, 556),
 (119, 542):(119, 541),
 (119, 543):(119, 544),
 (119, 545):(119, 544),
 (119, 546):(119, 548),
 (119, 547):(119, 548),
 (119, 557):(119, 556),
 (120, 551):(120, 550),
 (120, 552):(120, 553),
 (121, 553):(121, 554)}

#For BCR32
map_lat_lon={(117, 486):(117, 487),
(116, 486):(116, 487),
(115, 485):(115, 486),
(114, 482):(114, 485),
(114, 483):(114, 485),
(113, 480):(113, 485),
(113, 481):(113, 485),
(113, 482):(113, 485),
(113, 483):(113, 485),
(113, 484):(113, 485),
(112, 479):(112, 483),
(112, 480):(112, 483),
(111, 479):(111, 481),
(111, 480):(111, 481),
(110, 478):(110, 479),
(109, 477):(109, 478),
(106, 475):(106, 476),
(104, 473):(104, 475),
(104, 474):(104, 475),
(103, 473):(103, 474)}

checklist['lat_lon_modi']=checklist['lat_lon'].apply(lambda x: map_lat_lon[x] if x in set(map_lat_lon.keys()) else x)

# Calculate the short term climatic data for our purpose

In [None]:
#If there are no missing data in the short term climatic data, this code below is needed!
#Otherwise, ignore this.
checklist['lat_lon_modi']=checklist['lat_lon'] 

In [None]:
def cal_prec(x):
    ls=dic_prec[x['lat_lon_modi']][x['day_since_2005']-1825:x['day_since_2005']]

    mean30=np.nanmean(ls[-30:])
    std30=np.nanstd(ls[-30:])
    mean180=np.nanmean(ls[-180:])
    std180=np.nanstd(ls[-180:])
    mean365=np.nanmean(ls[-365:])
    std365=np.nanstd(ls[-365:])
    mean730=np.nanmean(ls[-730:])
    std730=np.nanstd(ls[-730:])
    mean1095=np.nanmean(ls[-1095:])
    std1095=np.nanstd(ls[-1095:])
    mean1460=np.nanmean(ls[-1460:])
    std1460=np.nanstd(ls[-1460:])
    mean1825=np.nanmean(ls)
    std1825=np.nanstd(ls)

    return mean30, std30, mean180, std180, mean365, std365, mean730, std730, mean1095, std1095,mean1460, std1460, mean1825, std1825

col_names=['prec30_mean', 'prec30_std','prec180_mean','prec180_std',
           'prec365_mean','prec365_std','prec730_mean', 'prec730_std',
           'prec1095_mean', 'prec1095_std','prec1460_mean','prec1460_std',
           'prec1825_mean','prec1825_std']
           
df_tocopy=pd.DataFrame(checklist.swifter.apply(cal_prec, axis=1).tolist(), columns=col_names)
checklist=pd.concat([checklist, df_tocopy], axis=1)

In [None]:
def cal_tmp(x):
    ls=[(x + y)/2 for (x, y) in zip(dic_tmax[x['lat_lon_modi']][x['day_since_2005']-1825:x['day_since_2005']],
                                    dic_tmin[x['lat_lon_modi']][x['day_since_2005']-1825:x['day_since_2005']])]
    mean30=np.nanmean(ls[-30:])
    std30=np.nanstd(ls[-30:])
    mean180=np.nanmean(ls[-180:])
    std180=np.nanstd(ls[-180:])
    mean365=np.nanmean(ls[-365:])
    std365=np.nanstd(ls[-365:])
    mean730=np.nanmean(ls[-730:])
    std730=np.nanstd(ls[-730:])
    mean1095=np.nanmean(ls[-1095:])
    std1095=np.nanstd(ls[-1095:])
    mean1460=np.nanmean(ls[-1460:])
    std1460=np.nanstd(ls[-1460:])
    mean1825=np.nanmean(ls)
    std1825=np.nanstd(ls)

    return mean30, std30, mean180, std180, mean365, std365, mean730, std730, mean1095, std1095,mean1460, std1460, mean1825, std1825

col_names=['tmp30_mean', 'tmp30_std','tmp180_mean','tmp180_std',
           'tmp365_mean','tmp365_std','tmp730_mean', 'tmp730_std',
           'tmp1095_mean', 'tmp1095_std','tmp1460_mean','tmp1460_std',
           'tmp1825_mean','tmp1825_std']

df_tocopy=pd.DataFrame(checklist.swifter.apply(cal_tmp, axis=1).tolist(), columns=col_names)
checklist=pd.concat([checklist, df_tocopy], axis=1)

In [None]:
def cal_tmax(x):
    ls=dic_tmax[x['lat_lon_modi']][x['day_since_2005']-1825:x['day_since_2005']]

    mean30=np.nanmean(ls[-30:])
    std30=np.nanstd(ls[-30:])
    mean180=np.nanmean(ls[-180:])
    std180=np.nanstd(ls[-180:])
    mean365=np.nanmean(ls[-365:])
    std365=np.nanstd(ls[-365:])
    mean730=np.nanmean(ls[-730:])
    std730=np.nanstd(ls[-730:])
    mean1095=np.nanmean(ls[-1095:])
    std1095=np.nanstd(ls[-1095:])
    mean1460=np.nanmean(ls[-1460:])
    std1460=np.nanstd(ls[-1460:])
    mean1825=np.nanmean(ls)
    std1825=np.nanstd(ls)

    return mean30, std30, mean180, std180, mean365, std365, mean730, std730, mean1095, std1095,mean1460, std1460, mean1825, std1825

col_names=['tmax30_mean', 'tmax30_std','tmax180_mean','tmax180_std',
           'tmax365_mean','tmax365_std','tmax730_mean', 'tmax730_std',
           'tmax1095_mean', 'tmax1095_std','tmax1460_mean','tmax1460_std',
           'tmax1825_mean','tmax1825_std']

df_tocopy=pd.DataFrame(checklist.swifter.apply(cal_tmax, axis=1).tolist(), columns=col_names)
checklist=pd.concat([checklist, df_tocopy], axis=1)

In [None]:
def cal_tmin(x):
    ls=dic_tmin[x['lat_lon_modi']][x['day_since_2005']-1825:x['day_since_2005']]

    mean30=np.nanmean(ls[-30:])
    std30=np.nanstd(ls[-30:])
    mean180=np.nanmean(ls[-180:])
    std180=np.nanstd(ls[-180:])
    mean365=np.nanmean(ls[-365:])
    std365=np.nanstd(ls[-365:])
    mean730=np.nanmean(ls[-730:])
    std730=np.nanstd(ls[-730:])
    mean1095=np.nanmean(ls[-1095:])
    std1095=np.nanstd(ls[-1095:])
    mean1460=np.nanmean(ls[-1460:])
    std1460=np.nanstd(ls[-1460:])
    mean1825=np.nanmean(ls)
    std1825=np.nanstd(ls)

    return mean30, std30, mean180, std180, mean365, std365, mean730, std730, mean1095, std1095,mean1460, std1460, mean1825, std1825

col_names=['tmin30_mean', 'tmin30_std','tmin180_mean','tmin180_std',
           'tmin365_mean','tmin365_std','tmin730_mean', 'tmin730_std',
           'tmin1095_mean', 'tmin1095_std','tmin1460_mean','tmin1460_std',
           'tmin1825_mean','tmin1825_std']

df_tocopy=pd.DataFrame(checklist.swifter.apply(cal_tmin, axis=1).tolist(), columns=col_names)
checklist=pd.concat([checklist, df_tocopy], axis=1)

In [None]:
var_names=['prec']
var_nums=['30','180','365','730','1095','1460','1825']

for var_name in tqdm_notebook(var_names):
    for var_num in var_nums:
        checklist[var_name+var_num+'_cv']=checklist[var_name+var_num+'_std']/checklist[var_name+var_num+'_mean']

# Merge dataframe and export the climatic variables of our study

In [None]:
def MergeDF(tmp,checklist):
    print(f'Original DF length:{len(tmp)}')
    print(f'Climatic DF lenght:{len(checklist)}')
    combined=pd.merge(tmp,checklist) 
  
    combined.drop(columns=['year_month', 'day', 'longitude360', 'lat_num','lat_cate',
                'lon_num', 'lon_cate','day_since_2009','day_since_2007'],inplace=True)

    print(f'Combined DF length:{len(combined)}')
    combined.to_csv(os.path.join(PATH,file_id+'_with_prep_tmp_clm.csv'),index=False)
    display(combined)
    return combined

combined=MergeDF(tmp,checklist)

In [None]:
#Check
pd.set_option('display.max_rows', 130)
combined.isnull().any()[combined.isnull().any()==True]