In [14]:
import statsmodels.api as sm
from statsmodels.tools.eval_measures import rmse
from aquacrop.utils import prepare_weather, get_filepath
from aquacrop import AquaCropModel, Soil, Crop, InitialWaterContent, IrrigationManagement
#from aquacrop.entities import IrrigationManagement
from os import chdir, getcwd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import datetime
import csv
import pickle 
#from dfply import *

from os import chdir, getcwd
import os 
import datetime
import shapefile as shp
import pandas as pd
import geopandas as gpd
from shapely import geometry
import math
from os import listdir
from os.path import isfile, join
import glob

In [None]:
os.chdir('/home/jupyter-wndlovu/') # change working directory

wd=getcwd()
#wd

In [349]:
# add county files
# 
def fileInput(filepath):
    """This function is used to read input data (.csv files) stored in folders"""
    
    path =  filepath# landouse folder path
    files = [f for f in listdir(path) if isfile(join(path, f))] # read files from folder
    dfs_list = []  # List to store the dataframes

    for file in files: # read files and save them a list of dataframes
        file_path = os.path.join(path, file)
        df = pd.read_csv(file_path) 
        dfs_list.append(df)
        
    return(dfs_list)


def fileList(dataframe):
    """This function is used to group the gridmet, soils and canopy cover dataframes by the crop, irrigation management and county"""
    
    group_dataframe = dataframe.groupby('crop_mn_codeyear')

# list to store the dfs
    county_list = []

# create separate dfs
    for i, j in group_dataframe:
        county_list.append(j.copy())
        
    return(county_list)


# Input data

In [43]:
gridmet_list = fileInput(wd + '/eggs/gmd4_gridMET')
lai_list = fileInput(wd + '/eggs/leaf_area_index') 
soils_list = fileInput(wd + '/eggs/gmd4_soils_county')

# GridMET wrangling

In [74]:
# clean the datasets by the 
gridmet_df = pd.concat(gridmet_list, ignore_index = True)
                         
                    
gridmet_df = gridmet_df.assign(tmmn = gridmet_df.tmmn-273.15, # convert to celcius
                    tmmx = gridmet_df.tmmx-273.15,
                    date_ymd = pd.to_datetime(gridmet_df['date_ymd'], format='%Y%m%d'))


# rename variables
gridmet_df  = gridmet_df.rename(columns = {
                                'tmmn':'MinTemp',
                                'tmmx':'Maxtemp',
                                'pr':'Precipitation',
                                'eto':'ReferenceET',
                                'date_ymd':'Date'
                                })

gridmet_df = gridmet_df[['crop_mn_codeyear',
                         'MinTemp', 
                         'Maxtemp',
                         'Precipitation', 
                         'ReferenceET',
                         'Date'
                        ]]

In [356]:
gridmet_county = fileList(gridmet_df)
len(gridmet_county)

68

# Leaf Area Index Wrangling

In [87]:
lai_df = pd.concat(lai_list, ignore_index = True)

lai_df = lai_df.assign(cc = (100.5*(1-np.exp(-0.6*lai_df['Lai']))**1.2),
                      date = pd.to_datetime(lai_df['date_ymd'], format='%Y%m%d')) # calc canopy cover Hsiao et al. (2009)

lai_df = lai_df[['crop_mn_codeyear', 'date', 'Lai', 'cc']]

In [358]:
lai_county = fileList(lai_df) 
len(lai_county)

68

# Soils wrangling

In [339]:
soils_df = pd.concat(soils_list)

soils_df = soils_df[['crop_mn_codeyear', 'Year', 'system:index', 'mean']]

In [342]:
soils_df['variable'] = soils_df['system:index'].str.replace(r'^.*?(?=[a-z])', '', regex=True) # remove all numbers before the firct chr
soils_df['soil_var'] = soils_df['variable'].str[:-21] # drop the last 21 characters
soils_df['var'] = soils_df['soil_var'].str.rsplit('_', n=2).str[0] # get the soil param
soils_df['depth'] =soils_df['soil_var'].str.extract(r'(\d+_\d+)') # soil depth
soils_df = soils_df[['crop_mn_codeyear', 'Year', 'depth', 'var', 'mean']]

In [348]:
# pivot table to match aquacrop input format
soils_df_pivot = soils_df.pivot_table(index=['crop_mn_codeyear', 'Year', 'depth'],
                                 columns="var", values="mean")

soils_df_pivot.reset_index()

var,crop_mn_codeyear,Year,depth,alpha,clay,hb,ksat,lambda,n,om,sand,silt,theta_r,theta_s
0,1_Decatur,2006.0,0_5,-0.415029,21.677830,0.416280,0.199379,0.298007,1.313759,0.193569,16.906940,58.423781,0.058815,0.499963
1,1_Decatur,2006.0,100_200,-0.417431,17.985571,0.416632,0.242347,0.315498,1.336323,-0.757719,19.357758,59.331870,0.054213,0.488484
2,1_Decatur,2006.0,15_30,-0.428745,25.289850,0.435674,0.125315,0.285533,1.295082,0.020264,15.133924,55.992885,0.064739,0.501657
3,1_Decatur,2006.0,30_60,-0.451755,25.984820,0.454456,0.037422,0.284164,1.293146,-0.216263,15.122233,55.539237,0.068000,0.499710
4,1_Decatur,2006.0,5_15,-0.422424,22.549341,0.426578,0.167593,0.294614,1.308546,0.153332,16.368427,57.726577,0.060534,0.495986
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5977,8_Wallace,2020.0,100_200,-0.279699,18.787997,0.271937,0.243686,0.332640,1.362163,-0.673430,28.874842,47.304358,0.053232,0.498738
5978,8_Wallace,2020.0,15_30,-0.320532,23.413299,0.321215,0.116561,0.306804,1.326308,-0.046521,23.304955,49.574708,0.061371,0.506419
5979,8_Wallace,2020.0,30_60,-0.345823,24.022649,0.349264,0.114771,0.303801,1.320484,-0.251860,21.579254,50.558681,0.063073,0.506955
5980,8_Wallace,2020.0,5_15,-0.303227,20.838549,0.305431,0.161349,0.320397,1.343348,0.071706,25.823096,49.789387,0.058402,0.508678


In [None]:
soils_county = fileList(soils_df_pivot) 
len(soils_county)

# Create dictionary with dataframes for each county, mngt and crop combinatio

In [396]:
county_comb = gridmet_df['crop_mn_codeyear'].unique() # get all unique ids
county_comb

array(['1_Rawlins', '1_Decatur', '1_Gove', '1_Thomas', '1_Sheridan',
       '1_Graham', '1_Logan', '2_Cheyenne', '2_Rawlins', '2_Decatur',
       '2_Sherman', '2_Gove', '2_Thomas', '2_Sheridan', '2_Graham',
       '2_Wallace', '2_Logan', '3_Rawlins', '3_Decatur', '3_Gove',
       '3_Thomas', '3_Sheridan', '3_Graham', '3_Logan', '4_Cheyenne',
       '4_Rawlins', '4_Decatur', '4_Sherman', '4_Gove', '4_Thomas',
       '4_Sheridan', '4_Graham', '4_Wallace', '4_Logan', '5_Rawlins',
       '5_Gove', '5_Thomas', '5_Sheridan', '5_Graham', '5_Logan',
       '6_Cheyenne', '6_Rawlins', '6_Decatur', '6_Sherman', '6_Gove',
       '6_Thomas', '6_Sheridan', '6_Graham', '6_Wallace', '6_Logan',
       '7_Rawlins', '7_Decatur', '7_Gove', '7_Thomas', '7_Sheridan',
       '7_Graham', '7_Logan', '8_Cheyenne', '8_Rawlins', '8_Decatur',
       '8_Sherman', '8_Gove', '8_Thomas', '8_Sheridan', '8_Graham',
       '8_Wallace', '8_Logan', '5_Decatur'], dtype=object)

In [421]:
input_dict = {}
comb_len = len(county_comb)

# loop through lists to form dict
for i in range(comb_len):
    key = str(gridmet_county[i]['crop_mn_codeyear'].unique())
    #print(key)
    value = (gridmet_county[i], lai_county[i], soils_county[i])
    input_dict[key] = value

In [None]:
#print(input_dict.keys())

In [427]:
# save dict as pickle
with open(wd + '/eggs/data/input_dict.pickle', 'wb') as input_data: # county crop managemnt
    pickle.dump(input_dict, input_data) 

# Next Step - Calibration