In [1]:
## my token and baseUrl
token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3N1ZWR0byI6InR1cmFuLmJ1bG11c0BzYXMuY29tIiwicmVzb3VyY2UiOlsiKiJdLCJyZXF1ZXN0X2xpbWl0IjoyNTAwMCwiYXJlYV9saW1pdCI6MS4wRTgsImV4cCI6MTU0ODg4OTIwMCwiaWF0IjoxNTM4OTkyNTQ4LCJpc3N1ZWRkYXRlIjoxNTM4OTkyNTQ4fQ.zLRBZhI2u7SWm6Z0HuSuWPpu0nAUcESySY1FMIv2J-o'
base_Url = "https://agrodatacube.wur.nl/api/v1/rest"
base_params = {"page_size":"1000","page_offset":"0"}
base_headers = {'Accept': "application/json", 'token': token}

import requests
import urllib3
import pandas as pd
import os
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

directory = os.getcwd()

# Helper Functions

In [2]:
#Function to extract tables from AgroDataCube
def get_table(add_url, tab_specific_params):
    """ Returns the normalized json table from the API call.
    Parameters
    ----------
    add_url: The add on to the base url to extract the information from the relevant table
    headers: Headers needed for the API request (dictionary)
    params: parameters required for the API request (dictionary)
    
    Returns
    -------
    Normalized pandas dataFrame
    """ 
    #Get the request
    base_params.update(tab_specific_params)
    req = requests.request("GET", base_Url + add_url, headers=base_headers, params=base_params, verify = False)
    """ Note here: The request requires significant amount of time if the request returns empty results
    """
    #Read the text into pandas data frame
    try:
        table = pd.read_json(req.text)
        return pd.io.json.json_normalize(table.features) #return normalized data frame
    except ValueError:
        if add_url != "/ahn":
            print("The parameters return empty results: ", tab_specific_params)

In [3]:
#Function that converts the geometry coordinates to strings used for the API request
def query_string_maker(geometry_coordinates):
    """ Return the string required for the AHN query given the geometry of the field
    Parameters
    ----------
    geometry_coordinates: The geometry.coordinates column value
    
    Returns
    -------
    String that can be used for the API call for AHN table
    
    Example
    -------
    fields.loc[0,'geometry.coordinates']:
    '[[[[6.6332234, 52.7820422], [6.6353762, 52.7812052], [6.636732, 52.7806833], [6.636004, 52.7800477], [6.6348456, 52.779029], [6.6339393, 52.7782573], [6.6335653, 52.7779318], [6.6333983, 52.7779871], [6.6321727, 52.778466], [6.630755, 52.7790159], [6.6290151, 52.7796832], [6.6273763, 52.7803245], [6.6279933, 52.7809444], [6.6268363, 52.7813831], [6.625738, 52.7818135], [6.626264, 52.7823157], [6.6266558, 52.7821654], [6.628324, 52.7815214], [6.6303975, 52.7807198], [6.6306689, 52.7806124], [6.6306697, 52.7806385], [6.6289106, 52.7813239], [6.6270292, 52.7820557], [6.6262863, 52.7823416], [6.6269024, 52.7829095], [6.627493, 52.7834753], [6.6280795999999995, 52.7840363], [6.6294839, 52.7834914], [6.631288, 52.782802], [6.6332234, 52.7820422]]]]'
    
    query_string_maker(fields.loc[0,'geometry.coordinates']):
        '6.6332234  52.7820422, 6.6353762  52.7812052, 6.636732  52.7806833, 6.636004  52.7800477, 6.6348456  52.779029, 6.6339393  52.7782573, 6.6335653  52.7779318, 6.6333983  52.7779871, 6.6321727  52.778466, 6.630755  52.7790159, 6.6290151  52.7796832, 6.6273763  52.7803245, 6.6279933  52.7809444, 6.6268363  52.7813831, 6.625738  52.7818135, 6.626264  52.7823157, 6.6266558  52.7821654, 6.628324  52.7815214, 6.6303975  52.7807198, 6.6306689  52.7806124, 6.6306697  52.7806385, 6.6289106  52.7813239, 6.6270292  52.7820557, 6.6262863  52.7823416, 6.6269024  52.7829095, 6.627493  52.7834753, 6.6280795999999995  52.7840363, 6.6294839  52.7834914, 6.631288  52.782802, 6.6332234  52.7820422'
    """
    #Remove the square brackets and split each coordinate with a comma
    l = str(geometry_coordinates).replace("[","").replace("]", "").split(",")
    #Add comma after every two number
    return ",".join([" ".join([l[i], l[i+1]]) for i in range(0, len(l), 2)])

In [4]:
#Function that checks if the initial coordinates are the same as the last coordinate verifying if the polygon is correct
def check_coordinates(coordinate_string):
    """ Checks if the first coordinate in the polygon is the same as the last one
    Parameters
    ----------
    coordinate_string: The new coordinates string derived from query_string_maker function
    Returns
    -------
    True if the polygon has the same end point as the starting point
    """
    return coordinate_string[:coordinate_string.find(",")] == coordinate_string[coordinate_string.rfind(",")+2:]

# 1) Extract Crop Codes

In [5]:
# Extract crop codes
add_url = "/codes/cropcodes"

crop_codes_table = get_table(add_url, tab_specific_params={})
crop_codes_table.head()

Unnamed: 0,properties.cropcode,properties.cropid,properties.cropname,properties.grondgebruik,type
0,2645,2,Notenbomen,Bouwland,Feature
1,1908,3,"Braak, zwarte- met ontheffing",Braakland,Feature
2,3718,4,"Grasland, natuurlijk",Grasland,Feature
3,859,5,"Aardappelen, zetmeel geleverd aan buitenland",Bouwland,Feature
4,3792,6,"Aardappelen, consumptie op zand/veengrond",Bouwland,Feature


In [6]:
#Filter only the relevant crops and also remove voderbits
filtered_crops = crop_codes_table[crop_codes_table["properties.cropname"].str.contains("Aardappelen|Biet|Ui")\
                                     & ~crop_codes_table['properties.cropname'].str.contains("voeder")]

# 2) Extract Fields

In [None]:
add_url = "/fields"
fields_col_names = ['features','geometry.coordinates', 'geometry.type', 'properties.area',
       'properties.crop_code', 'properties.crop_name', 'properties.fieldid',
       'properties.perimeter', 'properties.year', 'type']
#Empty data frame for the fields
fields_data = pd.DataFrame(columns=fields_col_names)
#Loop query over the selected crops above
for values in filtered_crops['properties.cropcode']:
    for years in range(2012, 2019):
        querystring = {"output_epsg":"4326", "year":str(years), "cropcode": values}
        headers = {'Accept': 'application/json;charset=utf-8', 'token': token}
        
        #Get the query
        fields = get_table(add_url, tab_specific_params=querystring)
        #Append it to the main data frame
        fields_data = fields_data.append(fields, sort=True, ignore_index=True)

#Remove irrelevant columns
fields = fields_data.drop(["features", "type"], axis=1)  

#Extract new coordinates from coordinates for sending it to API
fields["new_coordinates"] = fields['geometry.coordinates'].apply(query_string_maker)

#Column checking if the first coordinates in the polygon is the same as the last one
fields["coordinate_check"] = fields.new_coordinates.apply(check_coordinates)

#Extract wrong field information
fields[~fields.coordinate_check].to_csv(directory + "\\Data\\problematic fields.csv")

#The fields table should have only correct polygons
fields = fields[fields.coordinate_check]
#Set the fieldid as the index for the table
fields = fields.set_index("properties.fieldid")

In [None]:
fields = fields.set_index("properties.fieldid")

In [None]:
#Write to csv so that I dont have to run the querry again
fields.to_csv(directory + "\\Data\\fields.csv")

# 3) Extract AHN for each field

In [None]:
#Get the fields data
fields= pd.read_csv(directory + "\\Data\\fields.csv", index_col=["properties.fieldid"])

In [None]:
#Since information about each field can not be extracted in one go; divide fieldids into batches and run them seperately and
#combine them after wards
add_url = "/ahn"
ahn_col_names = ['properties.area', 'properties.max', 'properties.mean', 'properties.min']
initial_table = pd.DataFrame(columns=ahn_col_names)# index=['properties.fieldid'])
run_until = len(fields)

def get_ahn_in_batches(fields_table, table_to_append, initial_row_num, end_row_num):
    ahn_data = pd.DataFrame(columns=ahn_col_names)# index=['properties.fieldid'])
        
    for values in range(initial_row_num, end_row_num):
        #Extract the coordinates from the fields table
        queries = fields_table.new_coordinates.iloc[values]
        querystring = {"geometry":"POLYGON((" + queries + "))","epsg":"4326"}

        #Get the query
        ahn = get_table(add_url, querystring)
        
        if type(ahn) != type(None): #This is for making sure that there is data acquired from the query
            #Add the new column to the ahn table so that it can be joined with fields table later
            ahn["properties.fieldid"] = pd.Series(fields_table.index.values[values])
            ahn = ahn.set_index('properties.fieldid')
            
        #Append it to the main data frame
        ahn_data = ahn_data.append(ahn, sort=True)
    
    ahn_data = ahn_data.drop("type", axis = 1)
    return table_to_append.append(ahn_data, sort = True)

In [None]:
table1 = get_ahn_in_batches(fields,initial_table,0,5000)    

In [None]:
table1.to_csv(directory + "\\Data\\table1.csv")

In [None]:
table2 = get_ahn_in_batches(fields,table1,5000,10000)    

In [None]:
table2.to_csv(directory + "\\Data\\table2.csv")

In [None]:
table3 = get_ahn_in_batches(fields,table2,10000,20000)    

In [None]:
table3.to_csv(directory + "\\Data\\table3.csv")

In [None]:
table4 = get_ahn_in_batches(fields,table3,20000,25000)    

In [None]:
table4.to_csv(directory + "\\Data\\table4.csv")

In [None]:
table5 = get_ahn_in_batches(fields,table4,25000,run_until)    

In [None]:
table5.to_csv(directory + "\\Data\\ahn.csv")

# 4 Extract Soil info